From a8540f62bcaab7f72f706771f1a470bac717df01 Mon Sep 17 00:00:00 2001 From: chenli Date: Mon, 23 Dec 2024 09:35:37 +0800 Subject: [PATCH] Add support for LoongArch llvm's patches come from "llvm-15.0.7-1.0.4" tag of llvm repo; clang's patch comes from "clang-15.0.7-1.0.3.module+an8.8.0+11135+e398acfc" tag of clang repo. --- 0001-Support-LoongArch.patch | 83876 ++++++++++++++++ ...Add-LoongArch-Support-for-ObjectYAML.patch | 1983 + ...port-compiler-rt-and-fix-some-issues.patch | 20199 ++++ 0103-Support-LoongArch.patch | 24842 +++++ llvm-compat.spec | 16 +- 5 files changed, 130912 insertions(+), 4 deletions(-) create mode 100644 0001-Support-LoongArch.patch create mode 100644 0002-Add-LoongArch-Support-for-ObjectYAML.patch create mode 100644 0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch create mode 100644 0103-Support-LoongArch.patch diff --git a/0001-Support-LoongArch.patch b/0001-Support-LoongArch.patch new file mode 100644 index 0000000..f8d3aa3 --- /dev/null +++ b/0001-Support-LoongArch.patch @@ -0,0 +1,83876 @@ +diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt +index db207e332..bae2cebaf 100644 +--- a/llvm/CMakeLists.txt ++++ b/llvm/CMakeLists.txt +@@ -373,6 +373,7 @@ set(LLVM_ALL_TARGETS + BPF + Hexagon + Lanai ++ LoongArch + Mips + MSP430 + NVPTX +diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake +index 7e657fd15..b4b9b748c 100644 +--- a/llvm/cmake/config-ix.cmake ++++ b/llvm/cmake/config-ix.cmake +@@ -498,6 +498,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") + set(LLVM_NATIVE_ARCH RISCV) + elseif (LLVM_NATIVE_ARCH STREQUAL "m68k") + set(LLVM_NATIVE_ARCH M68k) ++elseif (LLVM_NATIVE_ARCH MATCHES "loongarch") ++ set(LLVM_NATIVE_ARCH LoongArch) + else () + message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") + endif () +diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess +index 60d3f588d..255257d40 100644 +--- a/llvm/cmake/config.guess ++++ b/llvm/cmake/config.guess +@@ -1021,6 +1021,9 @@ EOF + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; ++ loongarch64:Linux:*:*) ++ echo loongarch64-unknown-linux-gnu ++ exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; +diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h +index ba4584dc6..dbd0aeaab 100644 +--- a/llvm/include/llvm/ADT/Triple.h ++++ b/llvm/include/llvm/ADT/Triple.h +@@ -232,6 +232,7 @@ public: + GNUX32, + GNUILP32, + CODE16, ++ GNUABILPX32, + EABI, + EABIHF, + Android, +@@ -823,6 +824,16 @@ public: + : PointerWidth == 64; + } + ++ /// Tests whether the target is LoongArch 32-bit ++ bool isLoongArch32() const { ++ return getArch() == Triple::loongarch32; ++ } ++ ++ /// Tests whether the target is LoongArch 64-bit. ++ bool isLoongArch64() const { ++ return getArch() == Triple::loongarch64; ++ } ++ + /// Tests whether the target is LoongArch (32- and 64-bit). + bool isLoongArch() const { + return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; +diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h +index 99e7a9868..43f09b493 100644 +--- a/llvm/include/llvm/BinaryFormat/ELF.h ++++ b/llvm/include/llvm/BinaryFormat/ELF.h +@@ -902,22 +902,13 @@ enum { + + // LoongArch Specific e_flags + enum : unsigned { +- // Reference: https://github.com/loongson/LoongArch-Documentation. +- // The last commit hash (main branch) is +- // 99016636af64d02dee05e39974d4c1e55875c45b. +- // Note that there is an open PR +- // https://github.com/loongson/LoongArch-Documentation/pull/47 +- // talking about using 0x1, 0x2, 0x3 for ILP32S/F/D and use EI_CLASS to +- // distinguish LP64 and ILP32. If this PR get merged, we will update +- // the definition here. +- // Base ABI Types. +- EF_LOONGARCH_BASE_ABI_LP64S = 0x1, // LP64 soft-float ABI +- EF_LOONGARCH_BASE_ABI_LP64F = 0x2, // LP64 single-float ABI +- EF_LOONGARCH_BASE_ABI_LP64D = 0x3, // LP64 double-float ABI +- EF_LOONGARCH_BASE_ABI_ILP32S = 0x5, // ILP32 soft-float ABI +- EF_LOONGARCH_BASE_ABI_ILP32F = 0x6, // ILP32 single-float ABI +- EF_LOONGARCH_BASE_ABI_ILP32D = 0x7, // ILP32 double-float ABI +- EF_LOONGARCH_BASE_ABI_MASK = 0x7, // Mask for selecting base ABI ++ // FIXME: Change these when all ABIs definition were finalized. ++ // See current definitions: ++ // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version ++ EF_LARCH_ABI = 0x0003, ++ EF_LARCH_ABI_LP32 = 0x0001, ++ EF_LARCH_ABI_LPX32 = 0x0002, ++ EF_LARCH_ABI_LP64 = 0x0003, + }; + + // ELF Relocation types for LoongArch +diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +index c5c2780bc..5620df079 100644 +--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h ++++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +@@ -330,6 +330,42 @@ public: + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); + }; + ++// @brief LoongArch64 support. ++class OrcLoongArch64 { ++public: ++ static constexpr unsigned PointerSize = 8; ++ static constexpr unsigned TrampolineSize = 40; ++ static constexpr unsigned StubSize = 32; ++ static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; ++ static constexpr unsigned ResolverCodeSize = 0x120; ++ /// Write the resolver code into the given memory. The user is ++ /// responsible for allocating the memory and setting permissions. ++ /// ++ /// ReentryFnAddr should be the address of a function whose signature matches ++ /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr ++ /// argument of writeResolverCode will be passed as the second argument to ++ /// the function at ReentryFnAddr. ++ static void writeResolverCode(char *ResolverWorkingMem, ++ JITTargetAddress ResolverTargetAddress, ++ JITTargetAddress ReentryFnAddr, ++ JITTargetAddress ReentryCtxAddr); ++ ++ /// Write the requested number of trampolines into the given memory, ++ /// which must be big enough to hold 1 pointer, plus NumTrampolines ++ /// trampolines. ++ static void writeTrampolines(char *TrampolineBlockWorkingMem, ++ JITTargetAddress TrampolineBlockTargetAddress, ++ JITTargetAddress ResolverFnAddr, ++ unsigned NumTrampolines); ++ /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. ++ /// Stubs will be written as if linked at StubsBlockTargetAddress, with the ++ /// Nth stub using the Nth pointer in memory starting at ++ /// PointersBlockTargetAddress. ++ static void writeIndirectStubsBlock( ++ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, ++ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); ++}; ++ + // @brief riscv64 support. + // + // RISC-V 64 supports lazy JITing. +diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt +index 5151f9125..468d66379 100644 +--- a/llvm/include/llvm/IR/CMakeLists.txt ++++ b/llvm/include/llvm/IR/CMakeLists.txt +@@ -10,6 +10,7 @@ tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm) + tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf) + tablegen(LLVM IntrinsicsDirectX.h -gen-intrinsic-enums -intrinsic-prefix=dx) + tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon) ++tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch) + tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips) + tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm) + tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc) +diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h +index 0a8d27aad..3f78eb41f 100644 +--- a/llvm/include/llvm/IR/InlineAsm.h ++++ b/llvm/include/llvm/IR/InlineAsm.h +@@ -269,6 +269,7 @@ public: + Constraint_Uy, + Constraint_X, + Constraint_Z, ++ Constraint_ZB, + Constraint_ZC, + Constraint_Zy, + +diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td +index d46fa4fbf..76f2d66b7 100644 +--- a/llvm/include/llvm/IR/Intrinsics.td ++++ b/llvm/include/llvm/IR/Intrinsics.td +@@ -2053,3 +2053,4 @@ include "llvm/IR/IntrinsicsRISCV.td" + include "llvm/IR/IntrinsicsSPIRV.td" + include "llvm/IR/IntrinsicsVE.td" + include "llvm/IR/IntrinsicsDirectX.td" ++include "llvm/IR/IntrinsicsLoongArch.td" +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +new file mode 100644 +index 000000000..42c4e371d +--- /dev/null ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -0,0 +1,3657 @@ ++//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics ---------*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines all of the LoongArch-specific intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++let TargetPrefix = "loongarch" in { // All intrinsics start with "llvm.loongarch.". ++ ++//===----------------------------------------------------------------------===// ++// LoongArch LSX ++ ++def int_loongarch_lsx_vclo_b : ClangBuiltin<"__builtin_lsx_vclo_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_h : ClangBuiltin<"__builtin_lsx_vclo_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_w : ClangBuiltin<"__builtin_lsx_vclo_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_d : ClangBuiltin<"__builtin_lsx_vclo_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vflogb_s : ClangBuiltin<"__builtin_lsx_vflogb_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vflogb_d : ClangBuiltin<"__builtin_lsx_vflogb_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickve2gr_b : ClangBuiltin<"__builtin_lsx_vpickve2gr_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_h : ClangBuiltin<"__builtin_lsx_vpickve2gr_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_w : ClangBuiltin<"__builtin_lsx_vpickve2gr_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_d : ClangBuiltin<"__builtin_lsx_vpickve2gr_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickve2gr_bu : ClangBuiltin<"__builtin_lsx_vpickve2gr_bu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_hu : ClangBuiltin<"__builtin_lsx_vpickve2gr_hu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_wu : ClangBuiltin<"__builtin_lsx_vpickve2gr_wu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_du : ClangBuiltin<"__builtin_lsx_vpickve2gr_du">, ++ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplvei_b : ClangBuiltin<"__builtin_lsx_vreplvei_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_h : ClangBuiltin<"__builtin_lsx_vreplvei_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_w : ClangBuiltin<"__builtin_lsx_vreplvei_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_d : ClangBuiltin<"__builtin_lsx_vreplvei_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmskltz_b : ClangBuiltin<"__builtin_lsx_vmskltz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_h : ClangBuiltin<"__builtin_lsx_vmskltz_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_w : ClangBuiltin<"__builtin_lsx_vmskltz_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_d : ClangBuiltin<"__builtin_lsx_vmskltz_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmadd_s : ClangBuiltin<"__builtin_lsx_vfmadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmadd_d : ClangBuiltin<"__builtin_lsx_vfmadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmsub_s : ClangBuiltin<"__builtin_lsx_vfmsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmsub_d : ClangBuiltin<"__builtin_lsx_vfmsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfnmadd_s : ClangBuiltin<"__builtin_lsx_vfnmadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfnmadd_d : ClangBuiltin<"__builtin_lsx_vfnmadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfnmsub_s : ClangBuiltin<"__builtin_lsx_vfnmsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfnmsub_d : ClangBuiltin<"__builtin_lsx_vfnmsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_caf_s : ClangBuiltin<"__builtin_lsx_vfcmp_caf_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_caf_d : ClangBuiltin<"__builtin_lsx_vfcmp_caf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cor_s : ClangBuiltin<"__builtin_lsx_vfcmp_cor_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cor_d : ClangBuiltin<"__builtin_lsx_vfcmp_cor_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cun_s : ClangBuiltin<"__builtin_lsx_vfcmp_cun_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cun_d : ClangBuiltin<"__builtin_lsx_vfcmp_cun_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cune_s : ClangBuiltin<"__builtin_lsx_vfcmp_cune_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cune_d : ClangBuiltin<"__builtin_lsx_vfcmp_cune_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cueq_s : ClangBuiltin<"__builtin_lsx_vfcmp_cueq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cueq_d : ClangBuiltin<"__builtin_lsx_vfcmp_cueq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_ceq_s : ClangBuiltin<"__builtin_lsx_vfcmp_ceq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_ceq_d : ClangBuiltin<"__builtin_lsx_vfcmp_ceq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cne_s : ClangBuiltin<"__builtin_lsx_vfcmp_cne_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cne_d : ClangBuiltin<"__builtin_lsx_vfcmp_cne_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_clt_s : ClangBuiltin<"__builtin_lsx_vfcmp_clt_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_clt_d : ClangBuiltin<"__builtin_lsx_vfcmp_clt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cult_s : ClangBuiltin<"__builtin_lsx_vfcmp_cult_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cult_d : ClangBuiltin<"__builtin_lsx_vfcmp_cult_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cle_s : ClangBuiltin<"__builtin_lsx_vfcmp_cle_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cle_d : ClangBuiltin<"__builtin_lsx_vfcmp_cle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cule_s : ClangBuiltin<"__builtin_lsx_vfcmp_cule_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cule_d : ClangBuiltin<"__builtin_lsx_vfcmp_cule_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_saf_s : ClangBuiltin<"__builtin_lsx_vfcmp_saf_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_saf_d : ClangBuiltin<"__builtin_lsx_vfcmp_saf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sor_s : ClangBuiltin<"__builtin_lsx_vfcmp_sor_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sor_d : ClangBuiltin<"__builtin_lsx_vfcmp_sor_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sun_s : ClangBuiltin<"__builtin_lsx_vfcmp_sun_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sun_d : ClangBuiltin<"__builtin_lsx_vfcmp_sun_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sune_s : ClangBuiltin<"__builtin_lsx_vfcmp_sune_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sune_d : ClangBuiltin<"__builtin_lsx_vfcmp_sune_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sueq_s : ClangBuiltin<"__builtin_lsx_vfcmp_sueq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sueq_d : ClangBuiltin<"__builtin_lsx_vfcmp_sueq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_seq_s : ClangBuiltin<"__builtin_lsx_vfcmp_seq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_seq_d : ClangBuiltin<"__builtin_lsx_vfcmp_seq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sne_s : ClangBuiltin<"__builtin_lsx_vfcmp_sne_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sne_d : ClangBuiltin<"__builtin_lsx_vfcmp_sne_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_slt_s : ClangBuiltin<"__builtin_lsx_vfcmp_slt_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_slt_d : ClangBuiltin<"__builtin_lsx_vfcmp_slt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sult_s : ClangBuiltin<"__builtin_lsx_vfcmp_sult_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sult_d : ClangBuiltin<"__builtin_lsx_vfcmp_sult_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sle_s : ClangBuiltin<"__builtin_lsx_vfcmp_sle_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sle_d : ClangBuiltin<"__builtin_lsx_vfcmp_sle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sule_s : ClangBuiltin<"__builtin_lsx_vfcmp_sule_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sule_d : ClangBuiltin<"__builtin_lsx_vfcmp_sule_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitsel_v : ClangBuiltin<"__builtin_lsx_vbitsel_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf_b : ClangBuiltin<"__builtin_lsx_vshuf_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldrepl_b : ClangBuiltin<"__builtin_lsx_vldrepl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_h : ClangBuiltin<"__builtin_lsx_vldrepl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_w : ClangBuiltin<"__builtin_lsx_vldrepl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_d : ClangBuiltin<"__builtin_lsx_vldrepl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vstelm_b : ClangBuiltin<"__builtin_lsx_vstelm_b">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_h : ClangBuiltin<"__builtin_lsx_vstelm_h">, ++ Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_w : ClangBuiltin<"__builtin_lsx_vstelm_w">, ++ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_d : ClangBuiltin<"__builtin_lsx_vstelm_d">, ++ Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vldx : ClangBuiltin<"__builtin_lsx_vldx">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vstx : ClangBuiltin<"__builtin_lsx_vstx">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vaddwev_d_w : ClangBuiltin<"__builtin_lsx_vaddwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_h : ClangBuiltin<"__builtin_lsx_vaddwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_b : ClangBuiltin<"__builtin_lsx_vaddwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_d : ClangBuiltin<"__builtin_lsx_vaddwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwev_d_w : ClangBuiltin<"__builtin_lsx_vsubwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_w_h : ClangBuiltin<"__builtin_lsx_vsubwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_h_b : ClangBuiltin<"__builtin_lsx_vsubwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_q_d : ClangBuiltin<"__builtin_lsx_vsubwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++ ++def int_loongarch_lsx_vaddwod_d_w : ClangBuiltin<"__builtin_lsx_vaddwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_h : ClangBuiltin<"__builtin_lsx_vaddwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_b : ClangBuiltin<"__builtin_lsx_vaddwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_d : ClangBuiltin<"__builtin_lsx_vaddwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwod_d_w : ClangBuiltin<"__builtin_lsx_vsubwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_w_h : ClangBuiltin<"__builtin_lsx_vsubwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_h_b : ClangBuiltin<"__builtin_lsx_vsubwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_q_d : ClangBuiltin<"__builtin_lsx_vsubwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwev_d_wu : ClangBuiltin<"__builtin_lsx_vaddwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_hu : ClangBuiltin<"__builtin_lsx_vaddwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_bu : ClangBuiltin<"__builtin_lsx_vaddwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_du : ClangBuiltin<"__builtin_lsx_vaddwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwev_d_wu : ClangBuiltin<"__builtin_lsx_vsubwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_w_hu : ClangBuiltin<"__builtin_lsx_vsubwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_h_bu : ClangBuiltin<"__builtin_lsx_vsubwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_q_du : ClangBuiltin<"__builtin_lsx_vsubwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwod_d_wu : ClangBuiltin<"__builtin_lsx_vaddwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_hu : ClangBuiltin<"__builtin_lsx_vaddwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_bu : ClangBuiltin<"__builtin_lsx_vaddwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_du : ClangBuiltin<"__builtin_lsx_vaddwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwod_d_wu : ClangBuiltin<"__builtin_lsx_vsubwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_w_hu : ClangBuiltin<"__builtin_lsx_vsubwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_h_bu : ClangBuiltin<"__builtin_lsx_vsubwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_q_du : ClangBuiltin<"__builtin_lsx_vsubwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vaddwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vaddwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vaddwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_du_d : ClangBuiltin<"__builtin_lsx_vaddwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vaddwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vaddwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vaddwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_du_d : ClangBuiltin<"__builtin_lsx_vaddwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_qu_du : ClangBuiltin<"__builtin_lsx_vhaddw_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_qu_du : ClangBuiltin<"__builtin_lsx_vhsubw_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_q_d : ClangBuiltin<"__builtin_lsx_vhaddw_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_q_d : ClangBuiltin<"__builtin_lsx_vhsubw_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmuh_b : ClangBuiltin<"__builtin_lsx_vmuh_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_h : ClangBuiltin<"__builtin_lsx_vmuh_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_w : ClangBuiltin<"__builtin_lsx_vmuh_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_d : ClangBuiltin<"__builtin_lsx_vmuh_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmuh_bu : ClangBuiltin<"__builtin_lsx_vmuh_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_hu : ClangBuiltin<"__builtin_lsx_vmuh_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_wu : ClangBuiltin<"__builtin_lsx_vmuh_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_du : ClangBuiltin<"__builtin_lsx_vmuh_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_w : ClangBuiltin<"__builtin_lsx_vmulwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_h : ClangBuiltin<"__builtin_lsx_vmulwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_b : ClangBuiltin<"__builtin_lsx_vmulwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_d : ClangBuiltin<"__builtin_lsx_vmulwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_w : ClangBuiltin<"__builtin_lsx_vmulwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_h : ClangBuiltin<"__builtin_lsx_vmulwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_b : ClangBuiltin<"__builtin_lsx_vmulwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_d : ClangBuiltin<"__builtin_lsx_vmulwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_wu : ClangBuiltin<"__builtin_lsx_vmulwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_hu : ClangBuiltin<"__builtin_lsx_vmulwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_bu : ClangBuiltin<"__builtin_lsx_vmulwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_du : ClangBuiltin<"__builtin_lsx_vmulwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_wu : ClangBuiltin<"__builtin_lsx_vmulwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_hu : ClangBuiltin<"__builtin_lsx_vmulwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_bu : ClangBuiltin<"__builtin_lsx_vmulwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_du : ClangBuiltin<"__builtin_lsx_vmulwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vmulwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vmulwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vmulwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_du_d : ClangBuiltin<"__builtin_lsx_vmulwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vmulwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vmulwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vmulwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_du_d : ClangBuiltin<"__builtin_lsx_vmulwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_w : ClangBuiltin<"__builtin_lsx_vmaddwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_h : ClangBuiltin<"__builtin_lsx_vmaddwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_b : ClangBuiltin<"__builtin_lsx_vmaddwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_d : ClangBuiltin<"__builtin_lsx_vmaddwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_w : ClangBuiltin<"__builtin_lsx_vmaddwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_h : ClangBuiltin<"__builtin_lsx_vmaddwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_b : ClangBuiltin<"__builtin_lsx_vmaddwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_d : ClangBuiltin<"__builtin_lsx_vmaddwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_wu : ClangBuiltin<"__builtin_lsx_vmaddwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_hu : ClangBuiltin<"__builtin_lsx_vmaddwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_bu : ClangBuiltin<"__builtin_lsx_vmaddwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_du : ClangBuiltin<"__builtin_lsx_vmaddwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_wu : ClangBuiltin<"__builtin_lsx_vmaddwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_hu : ClangBuiltin<"__builtin_lsx_vmaddwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_bu : ClangBuiltin<"__builtin_lsx_vmaddwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_du : ClangBuiltin<"__builtin_lsx_vmaddwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vmaddwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vmaddwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vmaddwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_du_d : ClangBuiltin<"__builtin_lsx_vmaddwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vmaddwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vmaddwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vmaddwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_du_d : ClangBuiltin<"__builtin_lsx_vmaddwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrln_b_h : ClangBuiltin<"__builtin_lsx_vsrln_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrln_h_w : ClangBuiltin<"__builtin_lsx_vsrln_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrln_w_d : ClangBuiltin<"__builtin_lsx_vsrln_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsran_b_h : ClangBuiltin<"__builtin_lsx_vsran_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsran_h_w : ClangBuiltin<"__builtin_lsx_vsran_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsran_w_d : ClangBuiltin<"__builtin_lsx_vsran_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlrn_b_h : ClangBuiltin<"__builtin_lsx_vsrlrn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrn_h_w : ClangBuiltin<"__builtin_lsx_vsrlrn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrn_w_d : ClangBuiltin<"__builtin_lsx_vsrlrn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrarn_b_h : ClangBuiltin<"__builtin_lsx_vsrarn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarn_h_w : ClangBuiltin<"__builtin_lsx_vsrarn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarn_w_d : ClangBuiltin<"__builtin_lsx_vsrarn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrln_b_h : ClangBuiltin<"__builtin_lsx_vssrln_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_h_w : ClangBuiltin<"__builtin_lsx_vssrln_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_w_d : ClangBuiltin<"__builtin_lsx_vssrln_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssran_b_h : ClangBuiltin<"__builtin_lsx_vssran_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_h_w : ClangBuiltin<"__builtin_lsx_vssran_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_w_d : ClangBuiltin<"__builtin_lsx_vssran_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrn_b_h : ClangBuiltin<"__builtin_lsx_vssrlrn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_h_w : ClangBuiltin<"__builtin_lsx_vssrlrn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_w_d : ClangBuiltin<"__builtin_lsx_vssrlrn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarn_b_h : ClangBuiltin<"__builtin_lsx_vssrarn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_h_w : ClangBuiltin<"__builtin_lsx_vssrarn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_w_d : ClangBuiltin<"__builtin_lsx_vssrarn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrln_bu_h : ClangBuiltin<"__builtin_lsx_vssrln_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_hu_w : ClangBuiltin<"__builtin_lsx_vssrln_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_wu_d : ClangBuiltin<"__builtin_lsx_vssrln_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssran_bu_h : ClangBuiltin<"__builtin_lsx_vssran_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_hu_w : ClangBuiltin<"__builtin_lsx_vssran_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_wu_d : ClangBuiltin<"__builtin_lsx_vssran_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrn_bu_h : ClangBuiltin<"__builtin_lsx_vssrlrn_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_hu_w : ClangBuiltin<"__builtin_lsx_vssrlrn_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_wu_d : ClangBuiltin<"__builtin_lsx_vssrlrn_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarn_bu_h : ClangBuiltin<"__builtin_lsx_vssrarn_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_hu_w : ClangBuiltin<"__builtin_lsx_vssrarn_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_wu_d : ClangBuiltin<"__builtin_lsx_vssrarn_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vandn_v : ClangBuiltin<"__builtin_lsx_vandn_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vorn_v : ClangBuiltin<"__builtin_lsx_vorn_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrstp_b : ClangBuiltin<"__builtin_lsx_vfrstp_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vfrstp_h : ClangBuiltin<"__builtin_lsx_vfrstp_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadd_q : ClangBuiltin<"__builtin_lsx_vadd_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_q : ClangBuiltin<"__builtin_lsx_vsub_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsigncov_b : ClangBuiltin<"__builtin_lsx_vsigncov_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_h : ClangBuiltin<"__builtin_lsx_vsigncov_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_w : ClangBuiltin<"__builtin_lsx_vsigncov_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_d : ClangBuiltin<"__builtin_lsx_vsigncov_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvt_h_s : ClangBuiltin<"__builtin_lsx_vfcvt_h_s">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvt_s_d : ClangBuiltin<"__builtin_lsx_vfcvt_s_d">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_l : ClangBuiltin<"__builtin_lsx_vffint_s_l">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_w_d : ClangBuiltin<"__builtin_lsx_vftint_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_w_d : ClangBuiltin<"__builtin_lsx_vftintrz_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrp_w_d : ClangBuiltin<"__builtin_lsx_vftintrp_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrm_w_d : ClangBuiltin<"__builtin_lsx_vftintrm_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrne_w_d : ClangBuiltin<"__builtin_lsx_vftintrne_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbsrl_v : ClangBuiltin<"__builtin_lsx_vbsrl_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbsll_v : ClangBuiltin<"__builtin_lsx_vbsll_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrstpi_b : ClangBuiltin<"__builtin_lsx_vfrstpi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrstpi_h : ClangBuiltin<"__builtin_lsx_vfrstpi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vneg_b : ClangBuiltin<"__builtin_lsx_vneg_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_h : ClangBuiltin<"__builtin_lsx_vneg_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_w : ClangBuiltin<"__builtin_lsx_vneg_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_d : ClangBuiltin<"__builtin_lsx_vneg_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmskgez_b : ClangBuiltin<"__builtin_lsx_vmskgez_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmsknz_b : ClangBuiltin<"__builtin_lsx_vmsknz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrm_s : ClangBuiltin<"__builtin_lsx_vfrintrm_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrm_d : ClangBuiltin<"__builtin_lsx_vfrintrm_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrp_s : ClangBuiltin<"__builtin_lsx_vfrintrp_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrp_d : ClangBuiltin<"__builtin_lsx_vfrintrp_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrz_s : ClangBuiltin<"__builtin_lsx_vfrintrz_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrz_d : ClangBuiltin<"__builtin_lsx_vfrintrz_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrne_s : ClangBuiltin<"__builtin_lsx_vfrintrne_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrne_d : ClangBuiltin<"__builtin_lsx_vfrintrne_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffinth_d_w : ClangBuiltin<"__builtin_lsx_vffinth_d_w">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffintl_d_w : ClangBuiltin<"__builtin_lsx_vffintl_d_w">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrm_w_s : ClangBuiltin<"__builtin_lsx_vftintrm_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrm_l_d : ClangBuiltin<"__builtin_lsx_vftintrm_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrp_w_s : ClangBuiltin<"__builtin_lsx_vftintrp_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrp_l_d : ClangBuiltin<"__builtin_lsx_vftintrp_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_w_s : ClangBuiltin<"__builtin_lsx_vftintrz_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrz_l_d : ClangBuiltin<"__builtin_lsx_vftintrz_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrne_w_s : ClangBuiltin<"__builtin_lsx_vftintrne_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrne_l_d : ClangBuiltin<"__builtin_lsx_vftintrne_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftinth_l_s : ClangBuiltin<"__builtin_lsx_vftinth_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintl_l_s : ClangBuiltin<"__builtin_lsx_vftintl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrmh_l_s : ClangBuiltin<"__builtin_lsx_vftintrmh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrml_l_s : ClangBuiltin<"__builtin_lsx_vftintrml_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrph_l_s : ClangBuiltin<"__builtin_lsx_vftintrph_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrpl_l_s : ClangBuiltin<"__builtin_lsx_vftintrpl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrzh_l_s : ClangBuiltin<"__builtin_lsx_vftintrzh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrzl_l_s : ClangBuiltin<"__builtin_lsx_vftintrzl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrneh_l_s : ClangBuiltin<"__builtin_lsx_vftintrneh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrnel_l_s : ClangBuiltin<"__builtin_lsx_vftintrnel_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vexth_d_w : ClangBuiltin<"__builtin_lsx_vexth_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_w_h : ClangBuiltin<"__builtin_lsx_vexth_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_h_b : ClangBuiltin<"__builtin_lsx_vexth_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_q_d : ClangBuiltin<"__builtin_lsx_vexth_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vexth_du_wu : ClangBuiltin<"__builtin_lsx_vexth_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_wu_hu : ClangBuiltin<"__builtin_lsx_vexth_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_hu_bu : ClangBuiltin<"__builtin_lsx_vexth_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_qu_du : ClangBuiltin<"__builtin_lsx_vexth_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvexth_du_wu : ClangBuiltin<"__builtin_lasx_xvexth_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_wu_hu : ClangBuiltin<"__builtin_lasx_xvexth_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_hu_bu : ClangBuiltin<"__builtin_lasx_xvexth_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_qu_du : ClangBuiltin<"__builtin_lasx_xvexth_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsllwil_d_w : ClangBuiltin<"__builtin_lsx_vsllwil_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_w_h : ClangBuiltin<"__builtin_lsx_vsllwil_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_h_b : ClangBuiltin<"__builtin_lsx_vsllwil_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextl_q_d : ClangBuiltin<"__builtin_lsx_vextl_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsllwil_du_wu : ClangBuiltin<"__builtin_lsx_vsllwil_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_wu_hu : ClangBuiltin<"__builtin_lsx_vsllwil_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_hu_bu : ClangBuiltin<"__builtin_lsx_vsllwil_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextl_qu_du : ClangBuiltin<"__builtin_lsx_vextl_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitclri_b : ClangBuiltin<"__builtin_lsx_vbitclri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_h : ClangBuiltin<"__builtin_lsx_vbitclri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_w : ClangBuiltin<"__builtin_lsx_vbitclri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_d : ClangBuiltin<"__builtin_lsx_vbitclri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitseti_b : ClangBuiltin<"__builtin_lsx_vbitseti_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_h : ClangBuiltin<"__builtin_lsx_vbitseti_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_w : ClangBuiltin<"__builtin_lsx_vbitseti_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_d : ClangBuiltin<"__builtin_lsx_vbitseti_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitrevi_b : ClangBuiltin<"__builtin_lsx_vbitrevi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_h : ClangBuiltin<"__builtin_lsx_vbitrevi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_w : ClangBuiltin<"__builtin_lsx_vbitrevi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_d : ClangBuiltin<"__builtin_lsx_vbitrevi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrni_b_h : ClangBuiltin<"__builtin_lsx_vssrlrni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_h_w : ClangBuiltin<"__builtin_lsx_vssrlrni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_w_d : ClangBuiltin<"__builtin_lsx_vssrlrni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_d_q : ClangBuiltin<"__builtin_lsx_vssrlrni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrani_b_h : ClangBuiltin<"__builtin_lsx_vsrani_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_h_w : ClangBuiltin<"__builtin_lsx_vsrani_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_w_d : ClangBuiltin<"__builtin_lsx_vsrani_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_d_q : ClangBuiltin<"__builtin_lsx_vsrani_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextrins_b : ClangBuiltin<"__builtin_lsx_vextrins_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_h : ClangBuiltin<"__builtin_lsx_vextrins_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_w : ClangBuiltin<"__builtin_lsx_vextrins_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_d : ClangBuiltin<"__builtin_lsx_vextrins_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitseli_b : ClangBuiltin<"__builtin_lsx_vbitseli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vandi_b : ClangBuiltin<"__builtin_lsx_vandi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vori_b : ClangBuiltin<"__builtin_lsx_vori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vxori_b : ClangBuiltin<"__builtin_lsx_vxori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vnori_b : ClangBuiltin<"__builtin_lsx_vnori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldi : ClangBuiltin<"__builtin_lsx_vldi">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrepli_b : ClangBuiltin<"__builtin_lsx_vrepli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrepli_h : ClangBuiltin<"__builtin_lsx_vrepli_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrepli_w : ClangBuiltin<"__builtin_lsx_vrepli_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrepli_d : ClangBuiltin<"__builtin_lsx_vrepli_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpermi_w : ClangBuiltin<"__builtin_lsx_vpermi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsadd_b : ClangBuiltin<"__builtin_lsx_vsadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_h : ClangBuiltin<"__builtin_lsx_vsadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_w : ClangBuiltin<"__builtin_lsx_vsadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_d : ClangBuiltin<"__builtin_lsx_vsadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vssub_b : ClangBuiltin<"__builtin_lsx_vssub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_h : ClangBuiltin<"__builtin_lsx_vssub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_w : ClangBuiltin<"__builtin_lsx_vssub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_d : ClangBuiltin<"__builtin_lsx_vssub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsadd_bu : ClangBuiltin<"__builtin_lsx_vsadd_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_hu : ClangBuiltin<"__builtin_lsx_vsadd_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_wu : ClangBuiltin<"__builtin_lsx_vsadd_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_du : ClangBuiltin<"__builtin_lsx_vsadd_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vssub_bu : ClangBuiltin<"__builtin_lsx_vssub_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_hu : ClangBuiltin<"__builtin_lsx_vssub_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_wu : ClangBuiltin<"__builtin_lsx_vssub_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_du : ClangBuiltin<"__builtin_lsx_vssub_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_h_b : ClangBuiltin<"__builtin_lsx_vhaddw_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_w_h : ClangBuiltin<"__builtin_lsx_vhaddw_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_d_w : ClangBuiltin<"__builtin_lsx_vhaddw_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhsubw_h_b : ClangBuiltin<"__builtin_lsx_vhsubw_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_w_h : ClangBuiltin<"__builtin_lsx_vhsubw_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_d_w : ClangBuiltin<"__builtin_lsx_vhsubw_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_hu_bu : ClangBuiltin<"__builtin_lsx_vhaddw_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_wu_hu : ClangBuiltin<"__builtin_lsx_vhaddw_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_du_wu : ClangBuiltin<"__builtin_lsx_vhaddw_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhsubw_hu_bu : ClangBuiltin<"__builtin_lsx_vhsubw_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_wu_hu : ClangBuiltin<"__builtin_lsx_vhsubw_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_du_wu : ClangBuiltin<"__builtin_lsx_vhsubw_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadda_b : ClangBuiltin<"__builtin_lsx_vadda_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_h : ClangBuiltin<"__builtin_lsx_vadda_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_w : ClangBuiltin<"__builtin_lsx_vadda_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_d : ClangBuiltin<"__builtin_lsx_vadda_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vabsd_b : ClangBuiltin<"__builtin_lsx_vabsd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_h : ClangBuiltin<"__builtin_lsx_vabsd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_w : ClangBuiltin<"__builtin_lsx_vabsd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_d : ClangBuiltin<"__builtin_lsx_vabsd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vabsd_bu : ClangBuiltin<"__builtin_lsx_vabsd_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_hu : ClangBuiltin<"__builtin_lsx_vabsd_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_wu : ClangBuiltin<"__builtin_lsx_vabsd_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_du : ClangBuiltin<"__builtin_lsx_vabsd_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vavg_b : ClangBuiltin<"__builtin_lsx_vavg_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_h : ClangBuiltin<"__builtin_lsx_vavg_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_w : ClangBuiltin<"__builtin_lsx_vavg_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_d : ClangBuiltin<"__builtin_lsx_vavg_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavg_bu : ClangBuiltin<"__builtin_lsx_vavg_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_hu : ClangBuiltin<"__builtin_lsx_vavg_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_wu : ClangBuiltin<"__builtin_lsx_vavg_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_du : ClangBuiltin<"__builtin_lsx_vavg_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavgr_b : ClangBuiltin<"__builtin_lsx_vavgr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_h : ClangBuiltin<"__builtin_lsx_vavgr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_w : ClangBuiltin<"__builtin_lsx_vavgr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_d : ClangBuiltin<"__builtin_lsx_vavgr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavgr_bu : ClangBuiltin<"__builtin_lsx_vavgr_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_hu : ClangBuiltin<"__builtin_lsx_vavgr_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_wu : ClangBuiltin<"__builtin_lsx_vavgr_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_du : ClangBuiltin<"__builtin_lsx_vavgr_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlr_b : ClangBuiltin<"__builtin_lsx_vsrlr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_h : ClangBuiltin<"__builtin_lsx_vsrlr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_w : ClangBuiltin<"__builtin_lsx_vsrlr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_d : ClangBuiltin<"__builtin_lsx_vsrlr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrar_b : ClangBuiltin<"__builtin_lsx_vsrar_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_h : ClangBuiltin<"__builtin_lsx_vsrar_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_w : ClangBuiltin<"__builtin_lsx_vsrar_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_d : ClangBuiltin<"__builtin_lsx_vsrar_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmax_s : ClangBuiltin<"__builtin_lsx_vfmax_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmax_d : ClangBuiltin<"__builtin_lsx_vfmax_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmin_s : ClangBuiltin<"__builtin_lsx_vfmin_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmin_d : ClangBuiltin<"__builtin_lsx_vfmin_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmaxa_s : ClangBuiltin<"__builtin_lsx_vfmaxa_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmaxa_d : ClangBuiltin<"__builtin_lsx_vfmaxa_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmina_s : ClangBuiltin<"__builtin_lsx_vfmina_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmina_d : ClangBuiltin<"__builtin_lsx_vfmina_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfclass_s : ClangBuiltin<"__builtin_lsx_vfclass_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfclass_d : ClangBuiltin<"__builtin_lsx_vfclass_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrecip_s : ClangBuiltin<"__builtin_lsx_vfrecip_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrecip_d : ClangBuiltin<"__builtin_lsx_vfrecip_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrsqrt_s : ClangBuiltin<"__builtin_lsx_vfrsqrt_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrsqrt_d : ClangBuiltin<"__builtin_lsx_vfrsqrt_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvtl_s_h : ClangBuiltin<"__builtin_lsx_vfcvtl_s_h">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvtl_d_s : ClangBuiltin<"__builtin_lsx_vfcvtl_d_s">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvth_s_h : ClangBuiltin<"__builtin_lsx_vfcvth_s_h">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvth_d_s : ClangBuiltin<"__builtin_lsx_vfcvth_d_s">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftint_w_s : ClangBuiltin<"__builtin_lsx_vftint_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_l_d : ClangBuiltin<"__builtin_lsx_vftint_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftint_wu_s : ClangBuiltin<"__builtin_lsx_vftint_wu_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_lu_d : ClangBuiltin<"__builtin_lsx_vftint_lu_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlri_b : ClangBuiltin<"__builtin_lsx_vsrlri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_h : ClangBuiltin<"__builtin_lsx_vsrlri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_w : ClangBuiltin<"__builtin_lsx_vsrlri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_d : ClangBuiltin<"__builtin_lsx_vsrlri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrari_b : ClangBuiltin<"__builtin_lsx_vsrari_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_h : ClangBuiltin<"__builtin_lsx_vsrari_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_w : ClangBuiltin<"__builtin_lsx_vsrari_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_d : ClangBuiltin<"__builtin_lsx_vsrari_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsat_b : ClangBuiltin<"__builtin_lsx_vsat_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_h : ClangBuiltin<"__builtin_lsx_vsat_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_w : ClangBuiltin<"__builtin_lsx_vsat_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_d : ClangBuiltin<"__builtin_lsx_vsat_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsat_bu : ClangBuiltin<"__builtin_lsx_vsat_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_hu : ClangBuiltin<"__builtin_lsx_vsat_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_wu : ClangBuiltin<"__builtin_lsx_vsat_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_du : ClangBuiltin<"__builtin_lsx_vsat_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlni_b_h : ClangBuiltin<"__builtin_lsx_vsrlni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_h_w : ClangBuiltin<"__builtin_lsx_vsrlni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_w_d : ClangBuiltin<"__builtin_lsx_vsrlni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_d_q : ClangBuiltin<"__builtin_lsx_vsrlni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlrni_b_h : ClangBuiltin<"__builtin_lsx_vsrlrni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrni_h_w : ClangBuiltin<"__builtin_lsx_vsrlrni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrni_w_d : ClangBuiltin<"__builtin_lsx_vsrlrni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrni_d_q : ClangBuiltin<"__builtin_lsx_vsrlrni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlni_b_h : ClangBuiltin<"__builtin_lsx_vssrlni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_h_w : ClangBuiltin<"__builtin_lsx_vssrlni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_w_d : ClangBuiltin<"__builtin_lsx_vssrlni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_d_q : ClangBuiltin<"__builtin_lsx_vssrlni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrni_bu_h : ClangBuiltin<"__builtin_lsx_vssrlrni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_hu_w : ClangBuiltin<"__builtin_lsx_vssrlrni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_wu_d : ClangBuiltin<"__builtin_lsx_vssrlrni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_du_q : ClangBuiltin<"__builtin_lsx_vssrlrni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrarni_b_h : ClangBuiltin<"__builtin_lsx_vsrarni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_h_w : ClangBuiltin<"__builtin_lsx_vsrarni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_w_d : ClangBuiltin<"__builtin_lsx_vsrarni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_d_q : ClangBuiltin<"__builtin_lsx_vsrarni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrani_b_h : ClangBuiltin<"__builtin_lsx_vssrani_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_h_w : ClangBuiltin<"__builtin_lsx_vssrani_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_w_d : ClangBuiltin<"__builtin_lsx_vssrani_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_d_q : ClangBuiltin<"__builtin_lsx_vssrani_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrani_bu_h : ClangBuiltin<"__builtin_lsx_vssrani_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_hu_w : ClangBuiltin<"__builtin_lsx_vssrani_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_wu_d : ClangBuiltin<"__builtin_lsx_vssrani_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_du_q : ClangBuiltin<"__builtin_lsx_vssrani_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarni_b_h : ClangBuiltin<"__builtin_lsx_vssrarni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_h_w : ClangBuiltin<"__builtin_lsx_vssrarni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_w_d : ClangBuiltin<"__builtin_lsx_vssrarni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_d_q : ClangBuiltin<"__builtin_lsx_vssrarni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarni_bu_h : ClangBuiltin<"__builtin_lsx_vssrarni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_hu_w : ClangBuiltin<"__builtin_lsx_vssrarni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_wu_d : ClangBuiltin<"__builtin_lsx_vssrarni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_du_q : ClangBuiltin<"__builtin_lsx_vssrarni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlni_bu_h : ClangBuiltin<"__builtin_lsx_vssrlni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_hu_w : ClangBuiltin<"__builtin_lsx_vssrlni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_wu_d : ClangBuiltin<"__builtin_lsx_vssrlni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_du_q : ClangBuiltin<"__builtin_lsx_vssrlni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vseq_b : ClangBuiltin<"__builtin_lsx_vseq_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_h : ClangBuiltin<"__builtin_lsx_vseq_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_w : ClangBuiltin<"__builtin_lsx_vseq_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_d : ClangBuiltin<"__builtin_lsx_vseq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsle_b : ClangBuiltin<"__builtin_lsx_vsle_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_h : ClangBuiltin<"__builtin_lsx_vsle_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_w : ClangBuiltin<"__builtin_lsx_vsle_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_d : ClangBuiltin<"__builtin_lsx_vsle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsle_bu : ClangBuiltin<"__builtin_lsx_vsle_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_hu : ClangBuiltin<"__builtin_lsx_vsle_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_wu : ClangBuiltin<"__builtin_lsx_vsle_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_du : ClangBuiltin<"__builtin_lsx_vsle_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslt_b : ClangBuiltin<"__builtin_lsx_vslt_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_h : ClangBuiltin<"__builtin_lsx_vslt_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_w : ClangBuiltin<"__builtin_lsx_vslt_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_d : ClangBuiltin<"__builtin_lsx_vslt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslt_bu : ClangBuiltin<"__builtin_lsx_vslt_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_hu : ClangBuiltin<"__builtin_lsx_vslt_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_wu : ClangBuiltin<"__builtin_lsx_vslt_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_du : ClangBuiltin<"__builtin_lsx_vslt_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadd_b : ClangBuiltin<"__builtin_lsx_vadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_h : ClangBuiltin<"__builtin_lsx_vadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_w : ClangBuiltin<"__builtin_lsx_vadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_d : ClangBuiltin<"__builtin_lsx_vadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsub_b : ClangBuiltin<"__builtin_lsx_vsub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_h : ClangBuiltin<"__builtin_lsx_vsub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_w : ClangBuiltin<"__builtin_lsx_vsub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_d : ClangBuiltin<"__builtin_lsx_vsub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmax_b : ClangBuiltin<"__builtin_lsx_vmax_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_h : ClangBuiltin<"__builtin_lsx_vmax_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_w : ClangBuiltin<"__builtin_lsx_vmax_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_d : ClangBuiltin<"__builtin_lsx_vmax_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmin_b : ClangBuiltin<"__builtin_lsx_vmin_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_h : ClangBuiltin<"__builtin_lsx_vmin_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_w : ClangBuiltin<"__builtin_lsx_vmin_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_d : ClangBuiltin<"__builtin_lsx_vmin_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmax_bu : ClangBuiltin<"__builtin_lsx_vmax_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_hu : ClangBuiltin<"__builtin_lsx_vmax_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_wu : ClangBuiltin<"__builtin_lsx_vmax_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_du : ClangBuiltin<"__builtin_lsx_vmax_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmin_bu : ClangBuiltin<"__builtin_lsx_vmin_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_hu : ClangBuiltin<"__builtin_lsx_vmin_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_wu : ClangBuiltin<"__builtin_lsx_vmin_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_du : ClangBuiltin<"__builtin_lsx_vmin_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmul_b : ClangBuiltin<"__builtin_lsx_vmul_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_h : ClangBuiltin<"__builtin_lsx_vmul_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_w : ClangBuiltin<"__builtin_lsx_vmul_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_d : ClangBuiltin<"__builtin_lsx_vmul_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmadd_b : ClangBuiltin<"__builtin_lsx_vmadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_h : ClangBuiltin<"__builtin_lsx_vmadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_w : ClangBuiltin<"__builtin_lsx_vmadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_d : ClangBuiltin<"__builtin_lsx_vmadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmsub_b : ClangBuiltin<"__builtin_lsx_vmsub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_h : ClangBuiltin<"__builtin_lsx_vmsub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_w : ClangBuiltin<"__builtin_lsx_vmsub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_d : ClangBuiltin<"__builtin_lsx_vmsub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vdiv_b : ClangBuiltin<"__builtin_lsx_vdiv_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_h : ClangBuiltin<"__builtin_lsx_vdiv_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_w : ClangBuiltin<"__builtin_lsx_vdiv_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_d : ClangBuiltin<"__builtin_lsx_vdiv_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmod_b : ClangBuiltin<"__builtin_lsx_vmod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_h : ClangBuiltin<"__builtin_lsx_vmod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_w : ClangBuiltin<"__builtin_lsx_vmod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_d : ClangBuiltin<"__builtin_lsx_vmod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vdiv_bu : ClangBuiltin<"__builtin_lsx_vdiv_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_hu : ClangBuiltin<"__builtin_lsx_vdiv_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_wu : ClangBuiltin<"__builtin_lsx_vdiv_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_du : ClangBuiltin<"__builtin_lsx_vdiv_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsll_b : ClangBuiltin<"__builtin_lsx_vsll_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_h : ClangBuiltin<"__builtin_lsx_vsll_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_w : ClangBuiltin<"__builtin_lsx_vsll_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_d : ClangBuiltin<"__builtin_lsx_vsll_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrl_b : ClangBuiltin<"__builtin_lsx_vsrl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_h : ClangBuiltin<"__builtin_lsx_vsrl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_w : ClangBuiltin<"__builtin_lsx_vsrl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_d : ClangBuiltin<"__builtin_lsx_vsrl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitclr_b : ClangBuiltin<"__builtin_lsx_vbitclr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_h : ClangBuiltin<"__builtin_lsx_vbitclr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_w : ClangBuiltin<"__builtin_lsx_vbitclr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_d : ClangBuiltin<"__builtin_lsx_vbitclr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitset_b : ClangBuiltin<"__builtin_lsx_vbitset_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_h : ClangBuiltin<"__builtin_lsx_vbitset_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_w : ClangBuiltin<"__builtin_lsx_vbitset_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_d : ClangBuiltin<"__builtin_lsx_vbitset_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpackev_b : ClangBuiltin<"__builtin_lsx_vpackev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_h : ClangBuiltin<"__builtin_lsx_vpackev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_w : ClangBuiltin<"__builtin_lsx_vpackev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_d : ClangBuiltin<"__builtin_lsx_vpackev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpackod_b : ClangBuiltin<"__builtin_lsx_vpackod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_h : ClangBuiltin<"__builtin_lsx_vpackod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_w : ClangBuiltin<"__builtin_lsx_vpackod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_d : ClangBuiltin<"__builtin_lsx_vpackod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vilvl_b : ClangBuiltin<"__builtin_lsx_vilvl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_h : ClangBuiltin<"__builtin_lsx_vilvl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_w : ClangBuiltin<"__builtin_lsx_vilvl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_d : ClangBuiltin<"__builtin_lsx_vilvl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vilvh_b : ClangBuiltin<"__builtin_lsx_vilvh_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_h : ClangBuiltin<"__builtin_lsx_vilvh_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_w : ClangBuiltin<"__builtin_lsx_vilvh_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_d : ClangBuiltin<"__builtin_lsx_vilvh_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickev_b : ClangBuiltin<"__builtin_lsx_vpickev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_h : ClangBuiltin<"__builtin_lsx_vpickev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_w : ClangBuiltin<"__builtin_lsx_vpickev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_d : ClangBuiltin<"__builtin_lsx_vpickev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vand_v : ClangBuiltin<"__builtin_lsx_vand_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vor_v : ClangBuiltin<"__builtin_lsx_vor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitrev_b : ClangBuiltin<"__builtin_lsx_vbitrev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_h : ClangBuiltin<"__builtin_lsx_vbitrev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_w : ClangBuiltin<"__builtin_lsx_vbitrev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_d : ClangBuiltin<"__builtin_lsx_vbitrev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmod_bu : ClangBuiltin<"__builtin_lsx_vmod_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_hu : ClangBuiltin<"__builtin_lsx_vmod_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_wu : ClangBuiltin<"__builtin_lsx_vmod_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_du : ClangBuiltin<"__builtin_lsx_vmod_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickod_b : ClangBuiltin<"__builtin_lsx_vpickod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_h : ClangBuiltin<"__builtin_lsx_vpickod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_w : ClangBuiltin<"__builtin_lsx_vpickod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_d : ClangBuiltin<"__builtin_lsx_vpickod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplve_b : ClangBuiltin<"__builtin_lsx_vreplve_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_h : ClangBuiltin<"__builtin_lsx_vreplve_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_w : ClangBuiltin<"__builtin_lsx_vreplve_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_d : ClangBuiltin<"__builtin_lsx_vreplve_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsra_b : ClangBuiltin<"__builtin_lsx_vsra_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_h : ClangBuiltin<"__builtin_lsx_vsra_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_w : ClangBuiltin<"__builtin_lsx_vsra_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_d : ClangBuiltin<"__builtin_lsx_vsra_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vxor_v : ClangBuiltin<"__builtin_lsx_vxor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vnor_v : ClangBuiltin<"__builtin_lsx_vnor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfadd_s : ClangBuiltin<"__builtin_lsx_vfadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfadd_d : ClangBuiltin<"__builtin_lsx_vfadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfsub_s : ClangBuiltin<"__builtin_lsx_vfsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfsub_d : ClangBuiltin<"__builtin_lsx_vfsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmul_s : ClangBuiltin<"__builtin_lsx_vfmul_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmul_d : ClangBuiltin<"__builtin_lsx_vfmul_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf_h : ClangBuiltin<"__builtin_lsx_vshuf_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vshuf_w : ClangBuiltin<"__builtin_lsx_vshuf_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vshuf_d : ClangBuiltin<"__builtin_lsx_vshuf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vseqi_b : ClangBuiltin<"__builtin_lsx_vseqi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_h : ClangBuiltin<"__builtin_lsx_vseqi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_w : ClangBuiltin<"__builtin_lsx_vseqi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_d : ClangBuiltin<"__builtin_lsx_vseqi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslei_b : ClangBuiltin<"__builtin_lsx_vslei_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_h : ClangBuiltin<"__builtin_lsx_vslei_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_w : ClangBuiltin<"__builtin_lsx_vslei_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_d : ClangBuiltin<"__builtin_lsx_vslei_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslei_bu : ClangBuiltin<"__builtin_lsx_vslei_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_hu : ClangBuiltin<"__builtin_lsx_vslei_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_wu : ClangBuiltin<"__builtin_lsx_vslei_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_du : ClangBuiltin<"__builtin_lsx_vslei_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslti_b : ClangBuiltin<"__builtin_lsx_vslti_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_h : ClangBuiltin<"__builtin_lsx_vslti_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_w : ClangBuiltin<"__builtin_lsx_vslti_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_d : ClangBuiltin<"__builtin_lsx_vslti_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslti_bu : ClangBuiltin<"__builtin_lsx_vslti_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_hu : ClangBuiltin<"__builtin_lsx_vslti_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_wu : ClangBuiltin<"__builtin_lsx_vslti_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_du : ClangBuiltin<"__builtin_lsx_vslti_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddi_bu : ClangBuiltin<"__builtin_lsx_vaddi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_hu : ClangBuiltin<"__builtin_lsx_vaddi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_wu : ClangBuiltin<"__builtin_lsx_vaddi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_du : ClangBuiltin<"__builtin_lsx_vaddi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubi_bu : ClangBuiltin<"__builtin_lsx_vsubi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_hu : ClangBuiltin<"__builtin_lsx_vsubi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_wu : ClangBuiltin<"__builtin_lsx_vsubi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_du : ClangBuiltin<"__builtin_lsx_vsubi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaxi_b : ClangBuiltin<"__builtin_lsx_vmaxi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_h : ClangBuiltin<"__builtin_lsx_vmaxi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_w : ClangBuiltin<"__builtin_lsx_vmaxi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_d : ClangBuiltin<"__builtin_lsx_vmaxi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmini_b : ClangBuiltin<"__builtin_lsx_vmini_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_h : ClangBuiltin<"__builtin_lsx_vmini_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_w : ClangBuiltin<"__builtin_lsx_vmini_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_d : ClangBuiltin<"__builtin_lsx_vmini_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaxi_bu : ClangBuiltin<"__builtin_lsx_vmaxi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_hu : ClangBuiltin<"__builtin_lsx_vmaxi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_wu : ClangBuiltin<"__builtin_lsx_vmaxi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_du : ClangBuiltin<"__builtin_lsx_vmaxi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmini_bu : ClangBuiltin<"__builtin_lsx_vmini_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_hu : ClangBuiltin<"__builtin_lsx_vmini_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_wu : ClangBuiltin<"__builtin_lsx_vmini_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_du : ClangBuiltin<"__builtin_lsx_vmini_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vclz_b : ClangBuiltin<"__builtin_lsx_vclz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_h : ClangBuiltin<"__builtin_lsx_vclz_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_w : ClangBuiltin<"__builtin_lsx_vclz_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_d : ClangBuiltin<"__builtin_lsx_vclz_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpcnt_b : ClangBuiltin<"__builtin_lsx_vpcnt_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_h : ClangBuiltin<"__builtin_lsx_vpcnt_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_w : ClangBuiltin<"__builtin_lsx_vpcnt_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_d : ClangBuiltin<"__builtin_lsx_vpcnt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfsqrt_s : ClangBuiltin<"__builtin_lsx_vfsqrt_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfsqrt_d : ClangBuiltin<"__builtin_lsx_vfsqrt_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrint_s : ClangBuiltin<"__builtin_lsx_vfrint_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrint_d : ClangBuiltin<"__builtin_lsx_vfrint_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_w : ClangBuiltin<"__builtin_lsx_vffint_s_w">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffint_d_l : ClangBuiltin<"__builtin_lsx_vffint_d_l">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_wu : ClangBuiltin<"__builtin_lsx_vffint_s_wu">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffint_d_lu : ClangBuiltin<"__builtin_lsx_vffint_d_lu">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_wu_s : ClangBuiltin<"__builtin_lsx_vftintrz_wu_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrz_lu_d : ClangBuiltin<"__builtin_lsx_vftintrz_lu_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplgr2vr_b : ClangBuiltin<"__builtin_lsx_vreplgr2vr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_h : ClangBuiltin<"__builtin_lsx_vreplgr2vr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_w : ClangBuiltin<"__builtin_lsx_vreplgr2vr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_d : ClangBuiltin<"__builtin_lsx_vreplgr2vr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vinsgr2vr_b : ClangBuiltin<"__builtin_lsx_vinsgr2vr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_h : ClangBuiltin<"__builtin_lsx_vinsgr2vr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_w : ClangBuiltin<"__builtin_lsx_vinsgr2vr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_d : ClangBuiltin<"__builtin_lsx_vinsgr2vr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfdiv_s : ClangBuiltin<"__builtin_lsx_vfdiv_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfdiv_d : ClangBuiltin<"__builtin_lsx_vfdiv_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslli_b : ClangBuiltin<"__builtin_lsx_vslli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_h : ClangBuiltin<"__builtin_lsx_vslli_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_w : ClangBuiltin<"__builtin_lsx_vslli_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_d : ClangBuiltin<"__builtin_lsx_vslli_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrli_b : ClangBuiltin<"__builtin_lsx_vsrli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_h : ClangBuiltin<"__builtin_lsx_vsrli_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_w : ClangBuiltin<"__builtin_lsx_vsrli_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_d : ClangBuiltin<"__builtin_lsx_vsrli_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrai_b : ClangBuiltin<"__builtin_lsx_vsrai_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_h : ClangBuiltin<"__builtin_lsx_vsrai_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_w : ClangBuiltin<"__builtin_lsx_vsrai_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_d : ClangBuiltin<"__builtin_lsx_vsrai_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf4i_b : ClangBuiltin<"__builtin_lsx_vshuf4i_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_h : ClangBuiltin<"__builtin_lsx_vshuf4i_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_w : ClangBuiltin<"__builtin_lsx_vshuf4i_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_d : ClangBuiltin<"__builtin_lsx_vshuf4i_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrotr_b : ClangBuiltin<"__builtin_lsx_vrotr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_h : ClangBuiltin<"__builtin_lsx_vrotr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_w : ClangBuiltin<"__builtin_lsx_vrotr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_d : ClangBuiltin<"__builtin_lsx_vrotr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrotri_b : ClangBuiltin<"__builtin_lsx_vrotri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_h : ClangBuiltin<"__builtin_lsx_vrotri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_w : ClangBuiltin<"__builtin_lsx_vrotri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_d : ClangBuiltin<"__builtin_lsx_vrotri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vld : ClangBuiltin<"__builtin_lsx_vld">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vst : ClangBuiltin<"__builtin_lsx_vst">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_bz_v : ClangBuiltin<"__builtin_lsx_bz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_v : ClangBuiltin<"__builtin_lsx_bnz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bz_b : ClangBuiltin<"__builtin_lsx_bz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_h : ClangBuiltin<"__builtin_lsx_bz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_w : ClangBuiltin<"__builtin_lsx_bz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_d : ClangBuiltin<"__builtin_lsx_bz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_b : ClangBuiltin<"__builtin_lsx_bnz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_h : ClangBuiltin<"__builtin_lsx_bnz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_w : ClangBuiltin<"__builtin_lsx_bnz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_d : ClangBuiltin<"__builtin_lsx_bnz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++//===----------------------------------------------------------------------===// ++//LoongArch LASX ++ ++def int_loongarch_lasx_xvfmadd_s : ClangBuiltin<"__builtin_lasx_xvfmadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfmadd_d : ClangBuiltin<"__builtin_lasx_xvfmadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmsub_s : ClangBuiltin<"__builtin_lasx_xvfmsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfmsub_d : ClangBuiltin<"__builtin_lasx_xvfmsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++ ++def int_loongarch_lasx_xvfnmadd_s : ClangBuiltin<"__builtin_lasx_xvfnmadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfnmadd_d : ClangBuiltin<"__builtin_lasx_xvfnmadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfnmsub_s : ClangBuiltin<"__builtin_lasx_xvfnmsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfnmsub_d : ClangBuiltin<"__builtin_lasx_xvfnmsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvclo_b : ClangBuiltin<"__builtin_lasx_xvclo_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_h : ClangBuiltin<"__builtin_lasx_xvclo_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_w : ClangBuiltin<"__builtin_lasx_xvclo_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_d : ClangBuiltin<"__builtin_lasx_xvclo_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvflogb_s : ClangBuiltin<"__builtin_lasx_xvflogb_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvflogb_d : ClangBuiltin<"__builtin_lasx_xvflogb_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve2gr_w : ClangBuiltin<"__builtin_lasx_xvpickve2gr_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve2gr_d : ClangBuiltin<"__builtin_lasx_xvpickve2gr_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve2gr_wu : ClangBuiltin<"__builtin_lasx_xvpickve2gr_wu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve2gr_du : ClangBuiltin<"__builtin_lasx_xvpickve2gr_du">, ++ Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmskltz_b : ClangBuiltin<"__builtin_lasx_xvmskltz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_h : ClangBuiltin<"__builtin_lasx_xvmskltz_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_w : ClangBuiltin<"__builtin_lasx_xvmskltz_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_d : ClangBuiltin<"__builtin_lasx_xvmskltz_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_caf_s : ClangBuiltin<"__builtin_lasx_xvfcmp_caf_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_caf_d : ClangBuiltin<"__builtin_lasx_xvfcmp_caf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cor_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cor_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cor_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cor_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cun_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cun_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cun_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cun_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cune_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cune_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cune_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cune_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cueq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cueq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cueq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cueq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_ceq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_ceq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_ceq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_ceq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cne_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cne_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cne_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cne_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_clt_s : ClangBuiltin<"__builtin_lasx_xvfcmp_clt_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_clt_d : ClangBuiltin<"__builtin_lasx_xvfcmp_clt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cult_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cult_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cult_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cult_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cle_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cle_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cle_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cule_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cule_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cule_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cule_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_saf_s : ClangBuiltin<"__builtin_lasx_xvfcmp_saf_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_saf_d : ClangBuiltin<"__builtin_lasx_xvfcmp_saf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sor_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sor_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sor_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sor_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sun_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sun_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sun_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sun_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sune_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sune_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sune_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sune_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sueq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sueq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sueq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sueq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_seq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_seq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_seq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_seq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sne_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sne_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sne_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sne_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_slt_s : ClangBuiltin<"__builtin_lasx_xvfcmp_slt_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_slt_d : ClangBuiltin<"__builtin_lasx_xvfcmp_slt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sult_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sult_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sult_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sult_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sle_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sle_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sle_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sule_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sule_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sule_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sule_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitsel_v : ClangBuiltin<"__builtin_lasx_xvbitsel_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf_b : ClangBuiltin<"__builtin_lasx_xvshuf_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldrepl_b : ClangBuiltin<"__builtin_lasx_xvldrepl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_h : ClangBuiltin<"__builtin_lasx_xvldrepl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_w : ClangBuiltin<"__builtin_lasx_xvldrepl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_d : ClangBuiltin<"__builtin_lasx_xvldrepl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvstelm_b : ClangBuiltin<"__builtin_lasx_xvstelm_b">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_h : ClangBuiltin<"__builtin_lasx_xvstelm_h">, ++ Intrinsic<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_w : ClangBuiltin<"__builtin_lasx_xvstelm_w">, ++ Intrinsic<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_d : ClangBuiltin<"__builtin_lasx_xvstelm_d">, ++ Intrinsic<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvldx : ClangBuiltin<"__builtin_lasx_xvldx">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvstx : ClangBuiltin<"__builtin_lasx_xvstx">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvaddwev_d_w : ClangBuiltin<"__builtin_lasx_xvaddwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_h : ClangBuiltin<"__builtin_lasx_xvaddwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_b : ClangBuiltin<"__builtin_lasx_xvaddwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_d : ClangBuiltin<"__builtin_lasx_xvaddwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwev_d_w : ClangBuiltin<"__builtin_lasx_xvsubwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_w_h : ClangBuiltin<"__builtin_lasx_xvsubwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_h_b : ClangBuiltin<"__builtin_lasx_xvsubwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_q_d : ClangBuiltin<"__builtin_lasx_xvsubwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_w : ClangBuiltin<"__builtin_lasx_xvaddwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_h : ClangBuiltin<"__builtin_lasx_xvaddwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_b : ClangBuiltin<"__builtin_lasx_xvaddwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_d : ClangBuiltin<"__builtin_lasx_xvaddwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwod_d_w : ClangBuiltin<"__builtin_lasx_xvsubwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_w_h : ClangBuiltin<"__builtin_lasx_xvsubwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_h_b : ClangBuiltin<"__builtin_lasx_xvsubwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_q_d : ClangBuiltin<"__builtin_lasx_xvsubwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwev_d_wu : ClangBuiltin<"__builtin_lasx_xvaddwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_hu : ClangBuiltin<"__builtin_lasx_xvaddwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_bu : ClangBuiltin<"__builtin_lasx_xvaddwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_du : ClangBuiltin<"__builtin_lasx_xvaddwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwev_d_wu : ClangBuiltin<"__builtin_lasx_xvsubwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_w_hu : ClangBuiltin<"__builtin_lasx_xvsubwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_h_bu : ClangBuiltin<"__builtin_lasx_xvsubwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_q_du : ClangBuiltin<"__builtin_lasx_xvsubwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_wu : ClangBuiltin<"__builtin_lasx_xvaddwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_hu : ClangBuiltin<"__builtin_lasx_xvaddwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_bu : ClangBuiltin<"__builtin_lasx_xvaddwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_du : ClangBuiltin<"__builtin_lasx_xvaddwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwod_d_wu : ClangBuiltin<"__builtin_lasx_xvsubwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_w_hu : ClangBuiltin<"__builtin_lasx_xvsubwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_h_bu : ClangBuiltin<"__builtin_lasx_xvsubwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_q_du : ClangBuiltin<"__builtin_lasx_xvsubwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvaddwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvaddwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvaddwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvaddwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvaddwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvaddwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvaddwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvaddwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_qu_du : ClangBuiltin<"__builtin_lasx_xvhaddw_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_qu_du : ClangBuiltin<"__builtin_lasx_xvhsubw_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_q_d : ClangBuiltin<"__builtin_lasx_xvhaddw_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_q_d : ClangBuiltin<"__builtin_lasx_xvhsubw_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmuh_b : ClangBuiltin<"__builtin_lasx_xvmuh_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_h : ClangBuiltin<"__builtin_lasx_xvmuh_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_w : ClangBuiltin<"__builtin_lasx_xvmuh_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_d : ClangBuiltin<"__builtin_lasx_xvmuh_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmuh_bu : ClangBuiltin<"__builtin_lasx_xvmuh_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_hu : ClangBuiltin<"__builtin_lasx_xvmuh_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_wu : ClangBuiltin<"__builtin_lasx_xvmuh_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_du : ClangBuiltin<"__builtin_lasx_xvmuh_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_w : ClangBuiltin<"__builtin_lasx_xvmulwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_h : ClangBuiltin<"__builtin_lasx_xvmulwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_b : ClangBuiltin<"__builtin_lasx_xvmulwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_d : ClangBuiltin<"__builtin_lasx_xvmulwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_w : ClangBuiltin<"__builtin_lasx_xvmulwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_h : ClangBuiltin<"__builtin_lasx_xvmulwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_b : ClangBuiltin<"__builtin_lasx_xvmulwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_d : ClangBuiltin<"__builtin_lasx_xvmulwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_wu : ClangBuiltin<"__builtin_lasx_xvmulwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_hu : ClangBuiltin<"__builtin_lasx_xvmulwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_bu : ClangBuiltin<"__builtin_lasx_xvmulwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_du : ClangBuiltin<"__builtin_lasx_xvmulwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_wu : ClangBuiltin<"__builtin_lasx_xvmulwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_hu : ClangBuiltin<"__builtin_lasx_xvmulwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_bu : ClangBuiltin<"__builtin_lasx_xvmulwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_du : ClangBuiltin<"__builtin_lasx_xvmulwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmulwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmulwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmulwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvmulwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmulwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmulwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmulwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvmulwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_w : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_h : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_b : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_d : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_w : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_h : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_b : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_d : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_wu : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_hu : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_bu : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_du : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_wu : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_hu : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_bu : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_du : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrln_b_h : ClangBuiltin<"__builtin_lasx_xvsrln_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrln_h_w : ClangBuiltin<"__builtin_lasx_xvsrln_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrln_w_d : ClangBuiltin<"__builtin_lasx_xvsrln_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsran_b_h : ClangBuiltin<"__builtin_lasx_xvsran_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsran_h_w : ClangBuiltin<"__builtin_lasx_xvsran_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsran_w_d : ClangBuiltin<"__builtin_lasx_xvsran_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlrn_b_h : ClangBuiltin<"__builtin_lasx_xvsrlrn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrn_h_w : ClangBuiltin<"__builtin_lasx_xvsrlrn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrn_w_d : ClangBuiltin<"__builtin_lasx_xvsrlrn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrarn_b_h : ClangBuiltin<"__builtin_lasx_xvsrarn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarn_h_w : ClangBuiltin<"__builtin_lasx_xvsrarn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarn_w_d : ClangBuiltin<"__builtin_lasx_xvsrarn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrln_b_h : ClangBuiltin<"__builtin_lasx_xvssrln_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_h_w : ClangBuiltin<"__builtin_lasx_xvssrln_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_w_d : ClangBuiltin<"__builtin_lasx_xvssrln_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssran_b_h : ClangBuiltin<"__builtin_lasx_xvssran_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_h_w : ClangBuiltin<"__builtin_lasx_xvssran_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_w_d : ClangBuiltin<"__builtin_lasx_xvssran_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrn_b_h : ClangBuiltin<"__builtin_lasx_xvssrlrn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_h_w : ClangBuiltin<"__builtin_lasx_xvssrlrn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_w_d : ClangBuiltin<"__builtin_lasx_xvssrlrn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarn_b_h : ClangBuiltin<"__builtin_lasx_xvssrarn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_h_w : ClangBuiltin<"__builtin_lasx_xvssrarn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_w_d : ClangBuiltin<"__builtin_lasx_xvssrarn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrln_bu_h : ClangBuiltin<"__builtin_lasx_xvssrln_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_hu_w : ClangBuiltin<"__builtin_lasx_xvssrln_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_wu_d : ClangBuiltin<"__builtin_lasx_xvssrln_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssran_bu_h : ClangBuiltin<"__builtin_lasx_xvssran_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_hu_w : ClangBuiltin<"__builtin_lasx_xvssran_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_wu_d : ClangBuiltin<"__builtin_lasx_xvssran_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrn_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlrn_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlrn_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlrn_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarn_bu_h : ClangBuiltin<"__builtin_lasx_xvssrarn_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_hu_w : ClangBuiltin<"__builtin_lasx_xvssrarn_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_wu_d : ClangBuiltin<"__builtin_lasx_xvssrarn_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvandn_v : ClangBuiltin<"__builtin_lasx_xvandn_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvorn_v : ClangBuiltin<"__builtin_lasx_xvorn_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrstp_b : ClangBuiltin<"__builtin_lasx_xvfrstp_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfrstp_h : ClangBuiltin<"__builtin_lasx_xvfrstp_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadd_q : ClangBuiltin<"__builtin_lasx_xvadd_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_q : ClangBuiltin<"__builtin_lasx_xvsub_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsigncov_b : ClangBuiltin<"__builtin_lasx_xvsigncov_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_h : ClangBuiltin<"__builtin_lasx_xvsigncov_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_w : ClangBuiltin<"__builtin_lasx_xvsigncov_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_d : ClangBuiltin<"__builtin_lasx_xvsigncov_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvt_h_s : ClangBuiltin<"__builtin_lasx_xvfcvt_h_s">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvt_s_d : ClangBuiltin<"__builtin_lasx_xvfcvt_s_d">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_l : ClangBuiltin<"__builtin_lasx_xvffint_s_l">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_w_d : ClangBuiltin<"__builtin_lasx_xvftint_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_w_d : ClangBuiltin<"__builtin_lasx_xvftintrz_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrp_w_d : ClangBuiltin<"__builtin_lasx_xvftintrp_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrm_w_d : ClangBuiltin<"__builtin_lasx_xvftintrm_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrne_w_d : ClangBuiltin<"__builtin_lasx_xvftintrne_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbsrl_v : ClangBuiltin<"__builtin_lasx_xvbsrl_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbsll_v : ClangBuiltin<"__builtin_lasx_xvbsll_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrstpi_b : ClangBuiltin<"__builtin_lasx_xvfrstpi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrstpi_h : ClangBuiltin<"__builtin_lasx_xvfrstpi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvneg_b : ClangBuiltin<"__builtin_lasx_xvneg_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_h : ClangBuiltin<"__builtin_lasx_xvneg_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_w : ClangBuiltin<"__builtin_lasx_xvneg_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_d : ClangBuiltin<"__builtin_lasx_xvneg_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmskgez_b : ClangBuiltin<"__builtin_lasx_xvmskgez_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmsknz_b : ClangBuiltin<"__builtin_lasx_xvmsknz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrm_s : ClangBuiltin<"__builtin_lasx_xvfrintrm_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrm_d : ClangBuiltin<"__builtin_lasx_xvfrintrm_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrp_s : ClangBuiltin<"__builtin_lasx_xvfrintrp_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrp_d : ClangBuiltin<"__builtin_lasx_xvfrintrp_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrz_s : ClangBuiltin<"__builtin_lasx_xvfrintrz_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrz_d : ClangBuiltin<"__builtin_lasx_xvfrintrz_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrne_s : ClangBuiltin<"__builtin_lasx_xvfrintrne_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrne_d : ClangBuiltin<"__builtin_lasx_xvfrintrne_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffinth_d_w : ClangBuiltin<"__builtin_lasx_xvffinth_d_w">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffintl_d_w : ClangBuiltin<"__builtin_lasx_xvffintl_d_w">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrm_w_s : ClangBuiltin<"__builtin_lasx_xvftintrm_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrm_l_d : ClangBuiltin<"__builtin_lasx_xvftintrm_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrp_w_s : ClangBuiltin<"__builtin_lasx_xvftintrp_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrp_l_d : ClangBuiltin<"__builtin_lasx_xvftintrp_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_w_s : ClangBuiltin<"__builtin_lasx_xvftintrz_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrz_l_d : ClangBuiltin<"__builtin_lasx_xvftintrz_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrne_w_s : ClangBuiltin<"__builtin_lasx_xvftintrne_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrne_l_d : ClangBuiltin<"__builtin_lasx_xvftintrne_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftinth_l_s : ClangBuiltin<"__builtin_lasx_xvftinth_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintl_l_s : ClangBuiltin<"__builtin_lasx_xvftintl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrmh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrmh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrml_l_s : ClangBuiltin<"__builtin_lasx_xvftintrml_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrph_l_s : ClangBuiltin<"__builtin_lasx_xvftintrph_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrpl_l_s : ClangBuiltin<"__builtin_lasx_xvftintrpl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrzh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrzh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrzl_l_s : ClangBuiltin<"__builtin_lasx_xvftintrzl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrneh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrneh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrnel_l_s : ClangBuiltin<"__builtin_lasx_xvftintrnel_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvexth_d_w : ClangBuiltin<"__builtin_lasx_xvexth_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_w_h : ClangBuiltin<"__builtin_lasx_xvexth_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_h_b : ClangBuiltin<"__builtin_lasx_xvexth_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_q_d : ClangBuiltin<"__builtin_lasx_xvexth_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsllwil_d_w : ClangBuiltin<"__builtin_lasx_xvsllwil_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_w_h : ClangBuiltin<"__builtin_lasx_xvsllwil_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_h_b : ClangBuiltin<"__builtin_lasx_xvsllwil_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsllwil_du_wu : ClangBuiltin<"__builtin_lasx_xvsllwil_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_wu_hu : ClangBuiltin<"__builtin_lasx_xvsllwil_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_hu_bu : ClangBuiltin<"__builtin_lasx_xvsllwil_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitclri_b : ClangBuiltin<"__builtin_lasx_xvbitclri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_h : ClangBuiltin<"__builtin_lasx_xvbitclri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_w : ClangBuiltin<"__builtin_lasx_xvbitclri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_d : ClangBuiltin<"__builtin_lasx_xvbitclri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitseti_b : ClangBuiltin<"__builtin_lasx_xvbitseti_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_h : ClangBuiltin<"__builtin_lasx_xvbitseti_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_w : ClangBuiltin<"__builtin_lasx_xvbitseti_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_d : ClangBuiltin<"__builtin_lasx_xvbitseti_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitrevi_b : ClangBuiltin<"__builtin_lasx_xvbitrevi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_h : ClangBuiltin<"__builtin_lasx_xvbitrevi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_w : ClangBuiltin<"__builtin_lasx_xvbitrevi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_d : ClangBuiltin<"__builtin_lasx_xvbitrevi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrni_b_h : ClangBuiltin<"__builtin_lasx_xvssrlrni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_h_w : ClangBuiltin<"__builtin_lasx_xvssrlrni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_w_d : ClangBuiltin<"__builtin_lasx_xvssrlrni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_d_q : ClangBuiltin<"__builtin_lasx_xvssrlrni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrani_b_h : ClangBuiltin<"__builtin_lasx_xvsrani_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_h_w : ClangBuiltin<"__builtin_lasx_xvsrani_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_w_d : ClangBuiltin<"__builtin_lasx_xvsrani_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_d_q : ClangBuiltin<"__builtin_lasx_xvsrani_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvextrins_b : ClangBuiltin<"__builtin_lasx_xvextrins_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_h : ClangBuiltin<"__builtin_lasx_xvextrins_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_w : ClangBuiltin<"__builtin_lasx_xvextrins_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_d : ClangBuiltin<"__builtin_lasx_xvextrins_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitseli_b : ClangBuiltin<"__builtin_lasx_xvbitseli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvandi_b : ClangBuiltin<"__builtin_lasx_xvandi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvori_b : ClangBuiltin<"__builtin_lasx_xvori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvxori_b : ClangBuiltin<"__builtin_lasx_xvxori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvnori_b : ClangBuiltin<"__builtin_lasx_xvnori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldi : ClangBuiltin<"__builtin_lasx_xvldi">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrepli_b : ClangBuiltin<"__builtin_lasx_xvrepli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrepli_h : ClangBuiltin<"__builtin_lasx_xvrepli_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrepli_w : ClangBuiltin<"__builtin_lasx_xvrepli_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrepli_d : ClangBuiltin<"__builtin_lasx_xvrepli_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpermi_w : ClangBuiltin<"__builtin_lasx_xvpermi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsadd_b : ClangBuiltin<"__builtin_lasx_xvsadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_h : ClangBuiltin<"__builtin_lasx_xvsadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_w : ClangBuiltin<"__builtin_lasx_xvsadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_d : ClangBuiltin<"__builtin_lasx_xvsadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssub_b : ClangBuiltin<"__builtin_lasx_xvssub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_h : ClangBuiltin<"__builtin_lasx_xvssub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_w : ClangBuiltin<"__builtin_lasx_xvssub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_d : ClangBuiltin<"__builtin_lasx_xvssub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsadd_bu : ClangBuiltin<"__builtin_lasx_xvsadd_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_hu : ClangBuiltin<"__builtin_lasx_xvsadd_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_wu : ClangBuiltin<"__builtin_lasx_xvsadd_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_du : ClangBuiltin<"__builtin_lasx_xvsadd_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssub_bu : ClangBuiltin<"__builtin_lasx_xvssub_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_hu : ClangBuiltin<"__builtin_lasx_xvssub_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_wu : ClangBuiltin<"__builtin_lasx_xvssub_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_du : ClangBuiltin<"__builtin_lasx_xvssub_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_h_b : ClangBuiltin<"__builtin_lasx_xvhaddw_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_w_h : ClangBuiltin<"__builtin_lasx_xvhaddw_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_d_w : ClangBuiltin<"__builtin_lasx_xvhaddw_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhsubw_h_b : ClangBuiltin<"__builtin_lasx_xvhsubw_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_w_h : ClangBuiltin<"__builtin_lasx_xvhsubw_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_d_w : ClangBuiltin<"__builtin_lasx_xvhsubw_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_hu_bu : ClangBuiltin<"__builtin_lasx_xvhaddw_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_wu_hu : ClangBuiltin<"__builtin_lasx_xvhaddw_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_du_wu : ClangBuiltin<"__builtin_lasx_xvhaddw_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhsubw_hu_bu : ClangBuiltin<"__builtin_lasx_xvhsubw_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_wu_hu : ClangBuiltin<"__builtin_lasx_xvhsubw_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_du_wu : ClangBuiltin<"__builtin_lasx_xvhsubw_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadda_b : ClangBuiltin<"__builtin_lasx_xvadda_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_h : ClangBuiltin<"__builtin_lasx_xvadda_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_w : ClangBuiltin<"__builtin_lasx_xvadda_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_d : ClangBuiltin<"__builtin_lasx_xvadda_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvabsd_b : ClangBuiltin<"__builtin_lasx_xvabsd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_h : ClangBuiltin<"__builtin_lasx_xvabsd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_w : ClangBuiltin<"__builtin_lasx_xvabsd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_d : ClangBuiltin<"__builtin_lasx_xvabsd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvabsd_bu : ClangBuiltin<"__builtin_lasx_xvabsd_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_hu : ClangBuiltin<"__builtin_lasx_xvabsd_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_wu : ClangBuiltin<"__builtin_lasx_xvabsd_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_du : ClangBuiltin<"__builtin_lasx_xvabsd_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavg_b : ClangBuiltin<"__builtin_lasx_xvavg_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_h : ClangBuiltin<"__builtin_lasx_xvavg_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_w : ClangBuiltin<"__builtin_lasx_xvavg_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_d : ClangBuiltin<"__builtin_lasx_xvavg_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavg_bu : ClangBuiltin<"__builtin_lasx_xvavg_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_hu : ClangBuiltin<"__builtin_lasx_xvavg_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_wu : ClangBuiltin<"__builtin_lasx_xvavg_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_du : ClangBuiltin<"__builtin_lasx_xvavg_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavgr_b : ClangBuiltin<"__builtin_lasx_xvavgr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_h : ClangBuiltin<"__builtin_lasx_xvavgr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_w : ClangBuiltin<"__builtin_lasx_xvavgr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_d : ClangBuiltin<"__builtin_lasx_xvavgr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavgr_bu : ClangBuiltin<"__builtin_lasx_xvavgr_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_hu : ClangBuiltin<"__builtin_lasx_xvavgr_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_wu : ClangBuiltin<"__builtin_lasx_xvavgr_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_du : ClangBuiltin<"__builtin_lasx_xvavgr_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlr_b : ClangBuiltin<"__builtin_lasx_xvsrlr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_h : ClangBuiltin<"__builtin_lasx_xvsrlr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_w : ClangBuiltin<"__builtin_lasx_xvsrlr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_d : ClangBuiltin<"__builtin_lasx_xvsrlr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrar_b : ClangBuiltin<"__builtin_lasx_xvsrar_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_h : ClangBuiltin<"__builtin_lasx_xvsrar_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_w : ClangBuiltin<"__builtin_lasx_xvsrar_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_d : ClangBuiltin<"__builtin_lasx_xvsrar_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmax_s : ClangBuiltin<"__builtin_lasx_xvfmax_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmax_d : ClangBuiltin<"__builtin_lasx_xvfmax_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmin_s : ClangBuiltin<"__builtin_lasx_xvfmin_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmin_d : ClangBuiltin<"__builtin_lasx_xvfmin_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmaxa_s : ClangBuiltin<"__builtin_lasx_xvfmaxa_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmaxa_d : ClangBuiltin<"__builtin_lasx_xvfmaxa_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmina_s : ClangBuiltin<"__builtin_lasx_xvfmina_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmina_d : ClangBuiltin<"__builtin_lasx_xvfmina_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfclass_s : ClangBuiltin<"__builtin_lasx_xvfclass_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfclass_d : ClangBuiltin<"__builtin_lasx_xvfclass_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrecip_s : ClangBuiltin<"__builtin_lasx_xvfrecip_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrecip_d : ClangBuiltin<"__builtin_lasx_xvfrecip_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrsqrt_s : ClangBuiltin<"__builtin_lasx_xvfrsqrt_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrsqrt_d : ClangBuiltin<"__builtin_lasx_xvfrsqrt_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvtl_s_h : ClangBuiltin<"__builtin_lasx_xvfcvtl_s_h">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvtl_d_s : ClangBuiltin<"__builtin_lasx_xvfcvtl_d_s">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvth_s_h : ClangBuiltin<"__builtin_lasx_xvfcvth_s_h">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvth_d_s : ClangBuiltin<"__builtin_lasx_xvfcvth_d_s">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftint_w_s : ClangBuiltin<"__builtin_lasx_xvftint_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_l_d : ClangBuiltin<"__builtin_lasx_xvftint_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftint_wu_s : ClangBuiltin<"__builtin_lasx_xvftint_wu_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_lu_d : ClangBuiltin<"__builtin_lasx_xvftint_lu_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlri_b : ClangBuiltin<"__builtin_lasx_xvsrlri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_h : ClangBuiltin<"__builtin_lasx_xvsrlri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_w : ClangBuiltin<"__builtin_lasx_xvsrlri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_d : ClangBuiltin<"__builtin_lasx_xvsrlri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrari_b : ClangBuiltin<"__builtin_lasx_xvsrari_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_h : ClangBuiltin<"__builtin_lasx_xvsrari_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_w : ClangBuiltin<"__builtin_lasx_xvsrari_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_d : ClangBuiltin<"__builtin_lasx_xvsrari_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsat_b : ClangBuiltin<"__builtin_lasx_xvsat_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_h : ClangBuiltin<"__builtin_lasx_xvsat_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_w : ClangBuiltin<"__builtin_lasx_xvsat_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_d : ClangBuiltin<"__builtin_lasx_xvsat_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsat_bu : ClangBuiltin<"__builtin_lasx_xvsat_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_hu : ClangBuiltin<"__builtin_lasx_xvsat_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_wu : ClangBuiltin<"__builtin_lasx_xvsat_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_du : ClangBuiltin<"__builtin_lasx_xvsat_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlni_b_h : ClangBuiltin<"__builtin_lasx_xvsrlni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_h_w : ClangBuiltin<"__builtin_lasx_xvsrlni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_w_d : ClangBuiltin<"__builtin_lasx_xvsrlni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_d_q : ClangBuiltin<"__builtin_lasx_xvsrlni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlni_b_h : ClangBuiltin<"__builtin_lasx_xvssrlni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_h_w : ClangBuiltin<"__builtin_lasx_xvssrlni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_w_d : ClangBuiltin<"__builtin_lasx_xvssrlni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_d_q : ClangBuiltin<"__builtin_lasx_xvssrlni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlrni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlrni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlrni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_du_q : ClangBuiltin<"__builtin_lasx_xvssrlrni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrarni_b_h : ClangBuiltin<"__builtin_lasx_xvsrarni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_h_w : ClangBuiltin<"__builtin_lasx_xvsrarni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_w_d : ClangBuiltin<"__builtin_lasx_xvsrarni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_d_q : ClangBuiltin<"__builtin_lasx_xvsrarni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrani_b_h : ClangBuiltin<"__builtin_lasx_xvssrani_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_h_w : ClangBuiltin<"__builtin_lasx_xvssrani_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_w_d : ClangBuiltin<"__builtin_lasx_xvssrani_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_d_q : ClangBuiltin<"__builtin_lasx_xvssrani_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrani_bu_h : ClangBuiltin<"__builtin_lasx_xvssrani_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_hu_w : ClangBuiltin<"__builtin_lasx_xvssrani_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_wu_d : ClangBuiltin<"__builtin_lasx_xvssrani_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_du_q : ClangBuiltin<"__builtin_lasx_xvssrani_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarni_b_h : ClangBuiltin<"__builtin_lasx_xvssrarni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_h_w : ClangBuiltin<"__builtin_lasx_xvssrarni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_w_d : ClangBuiltin<"__builtin_lasx_xvssrarni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_d_q : ClangBuiltin<"__builtin_lasx_xvssrarni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrarni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrarni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrarni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_du_q : ClangBuiltin<"__builtin_lasx_xvssrarni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_du_q : ClangBuiltin<"__builtin_lasx_xvssrlni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvseq_b : ClangBuiltin<"__builtin_lasx_xvseq_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_h : ClangBuiltin<"__builtin_lasx_xvseq_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_w : ClangBuiltin<"__builtin_lasx_xvseq_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_d : ClangBuiltin<"__builtin_lasx_xvseq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsle_b : ClangBuiltin<"__builtin_lasx_xvsle_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_h : ClangBuiltin<"__builtin_lasx_xvsle_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_w : ClangBuiltin<"__builtin_lasx_xvsle_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_d : ClangBuiltin<"__builtin_lasx_xvsle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsle_bu : ClangBuiltin<"__builtin_lasx_xvsle_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_hu : ClangBuiltin<"__builtin_lasx_xvsle_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_wu : ClangBuiltin<"__builtin_lasx_xvsle_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_du : ClangBuiltin<"__builtin_lasx_xvsle_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslt_b : ClangBuiltin<"__builtin_lasx_xvslt_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_h : ClangBuiltin<"__builtin_lasx_xvslt_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_w : ClangBuiltin<"__builtin_lasx_xvslt_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_d : ClangBuiltin<"__builtin_lasx_xvslt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslt_bu : ClangBuiltin<"__builtin_lasx_xvslt_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_hu : ClangBuiltin<"__builtin_lasx_xvslt_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_wu : ClangBuiltin<"__builtin_lasx_xvslt_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_du : ClangBuiltin<"__builtin_lasx_xvslt_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadd_b : ClangBuiltin<"__builtin_lasx_xvadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_h : ClangBuiltin<"__builtin_lasx_xvadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_w : ClangBuiltin<"__builtin_lasx_xvadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_d : ClangBuiltin<"__builtin_lasx_xvadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsub_b : ClangBuiltin<"__builtin_lasx_xvsub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_h : ClangBuiltin<"__builtin_lasx_xvsub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_w : ClangBuiltin<"__builtin_lasx_xvsub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_d : ClangBuiltin<"__builtin_lasx_xvsub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmax_b : ClangBuiltin<"__builtin_lasx_xvmax_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_h : ClangBuiltin<"__builtin_lasx_xvmax_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_w : ClangBuiltin<"__builtin_lasx_xvmax_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_d : ClangBuiltin<"__builtin_lasx_xvmax_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmin_b : ClangBuiltin<"__builtin_lasx_xvmin_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_h : ClangBuiltin<"__builtin_lasx_xvmin_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_w : ClangBuiltin<"__builtin_lasx_xvmin_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_d : ClangBuiltin<"__builtin_lasx_xvmin_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmax_bu : ClangBuiltin<"__builtin_lasx_xvmax_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_hu : ClangBuiltin<"__builtin_lasx_xvmax_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_wu : ClangBuiltin<"__builtin_lasx_xvmax_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_du : ClangBuiltin<"__builtin_lasx_xvmax_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmin_bu : ClangBuiltin<"__builtin_lasx_xvmin_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_hu : ClangBuiltin<"__builtin_lasx_xvmin_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_wu : ClangBuiltin<"__builtin_lasx_xvmin_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_du : ClangBuiltin<"__builtin_lasx_xvmin_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmul_b : ClangBuiltin<"__builtin_lasx_xvmul_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_h : ClangBuiltin<"__builtin_lasx_xvmul_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_w : ClangBuiltin<"__builtin_lasx_xvmul_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_d : ClangBuiltin<"__builtin_lasx_xvmul_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmadd_b : ClangBuiltin<"__builtin_lasx_xvmadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_h : ClangBuiltin<"__builtin_lasx_xvmadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_w : ClangBuiltin<"__builtin_lasx_xvmadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_d : ClangBuiltin<"__builtin_lasx_xvmadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmsub_b : ClangBuiltin<"__builtin_lasx_xvmsub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_h : ClangBuiltin<"__builtin_lasx_xvmsub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_w : ClangBuiltin<"__builtin_lasx_xvmsub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_d : ClangBuiltin<"__builtin_lasx_xvmsub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvdiv_b : ClangBuiltin<"__builtin_lasx_xvdiv_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_h : ClangBuiltin<"__builtin_lasx_xvdiv_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_w : ClangBuiltin<"__builtin_lasx_xvdiv_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_d : ClangBuiltin<"__builtin_lasx_xvdiv_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmod_b : ClangBuiltin<"__builtin_lasx_xvmod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_h : ClangBuiltin<"__builtin_lasx_xvmod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_w : ClangBuiltin<"__builtin_lasx_xvmod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_d : ClangBuiltin<"__builtin_lasx_xvmod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvdiv_bu : ClangBuiltin<"__builtin_lasx_xvdiv_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_hu : ClangBuiltin<"__builtin_lasx_xvdiv_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_wu : ClangBuiltin<"__builtin_lasx_xvdiv_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_du : ClangBuiltin<"__builtin_lasx_xvdiv_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsll_b : ClangBuiltin<"__builtin_lasx_xvsll_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_h : ClangBuiltin<"__builtin_lasx_xvsll_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_w : ClangBuiltin<"__builtin_lasx_xvsll_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_d : ClangBuiltin<"__builtin_lasx_xvsll_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrl_b : ClangBuiltin<"__builtin_lasx_xvsrl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_h : ClangBuiltin<"__builtin_lasx_xvsrl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_w : ClangBuiltin<"__builtin_lasx_xvsrl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_d : ClangBuiltin<"__builtin_lasx_xvsrl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitclr_b : ClangBuiltin<"__builtin_lasx_xvbitclr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_h : ClangBuiltin<"__builtin_lasx_xvbitclr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_w : ClangBuiltin<"__builtin_lasx_xvbitclr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_d : ClangBuiltin<"__builtin_lasx_xvbitclr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitset_b : ClangBuiltin<"__builtin_lasx_xvbitset_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_h : ClangBuiltin<"__builtin_lasx_xvbitset_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_w : ClangBuiltin<"__builtin_lasx_xvbitset_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_d : ClangBuiltin<"__builtin_lasx_xvbitset_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpackev_b : ClangBuiltin<"__builtin_lasx_xvpackev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_h : ClangBuiltin<"__builtin_lasx_xvpackev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_w : ClangBuiltin<"__builtin_lasx_xvpackev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_d : ClangBuiltin<"__builtin_lasx_xvpackev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpackod_b : ClangBuiltin<"__builtin_lasx_xvpackod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_h : ClangBuiltin<"__builtin_lasx_xvpackod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_w : ClangBuiltin<"__builtin_lasx_xvpackod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_d : ClangBuiltin<"__builtin_lasx_xvpackod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvilvl_b : ClangBuiltin<"__builtin_lasx_xvilvl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_h : ClangBuiltin<"__builtin_lasx_xvilvl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_w : ClangBuiltin<"__builtin_lasx_xvilvl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_d : ClangBuiltin<"__builtin_lasx_xvilvl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvilvh_b : ClangBuiltin<"__builtin_lasx_xvilvh_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_h : ClangBuiltin<"__builtin_lasx_xvilvh_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_w : ClangBuiltin<"__builtin_lasx_xvilvh_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_d : ClangBuiltin<"__builtin_lasx_xvilvh_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickev_b : ClangBuiltin<"__builtin_lasx_xvpickev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_h : ClangBuiltin<"__builtin_lasx_xvpickev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_w : ClangBuiltin<"__builtin_lasx_xvpickev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_d : ClangBuiltin<"__builtin_lasx_xvpickev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvand_v : ClangBuiltin<"__builtin_lasx_xvand_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvor_v : ClangBuiltin<"__builtin_lasx_xvor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitrev_b : ClangBuiltin<"__builtin_lasx_xvbitrev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_h : ClangBuiltin<"__builtin_lasx_xvbitrev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_w : ClangBuiltin<"__builtin_lasx_xvbitrev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_d : ClangBuiltin<"__builtin_lasx_xvbitrev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmod_bu : ClangBuiltin<"__builtin_lasx_xvmod_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_hu : ClangBuiltin<"__builtin_lasx_xvmod_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_wu : ClangBuiltin<"__builtin_lasx_xvmod_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_du : ClangBuiltin<"__builtin_lasx_xvmod_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickod_b : ClangBuiltin<"__builtin_lasx_xvpickod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_h : ClangBuiltin<"__builtin_lasx_xvpickod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_w : ClangBuiltin<"__builtin_lasx_xvpickod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_d : ClangBuiltin<"__builtin_lasx_xvpickod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplve_b : ClangBuiltin<"__builtin_lasx_xvreplve_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_h : ClangBuiltin<"__builtin_lasx_xvreplve_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_w : ClangBuiltin<"__builtin_lasx_xvreplve_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_d : ClangBuiltin<"__builtin_lasx_xvreplve_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsra_b : ClangBuiltin<"__builtin_lasx_xvsra_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_h : ClangBuiltin<"__builtin_lasx_xvsra_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_w : ClangBuiltin<"__builtin_lasx_xvsra_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_d : ClangBuiltin<"__builtin_lasx_xvsra_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvxor_v : ClangBuiltin<"__builtin_lasx_xvxor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvnor_v : ClangBuiltin<"__builtin_lasx_xvnor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfadd_s : ClangBuiltin<"__builtin_lasx_xvfadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfadd_d : ClangBuiltin<"__builtin_lasx_xvfadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfsub_s : ClangBuiltin<"__builtin_lasx_xvfsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfsub_d : ClangBuiltin<"__builtin_lasx_xvfsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmul_s : ClangBuiltin<"__builtin_lasx_xvfmul_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmul_d : ClangBuiltin<"__builtin_lasx_xvfmul_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf_h : ClangBuiltin<"__builtin_lasx_xvshuf_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf_w : ClangBuiltin<"__builtin_lasx_xvshuf_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf_d : ClangBuiltin<"__builtin_lasx_xvshuf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvseqi_b : ClangBuiltin<"__builtin_lasx_xvseqi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_h : ClangBuiltin<"__builtin_lasx_xvseqi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_w : ClangBuiltin<"__builtin_lasx_xvseqi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_d : ClangBuiltin<"__builtin_lasx_xvseqi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslei_b : ClangBuiltin<"__builtin_lasx_xvslei_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_h : ClangBuiltin<"__builtin_lasx_xvslei_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_w : ClangBuiltin<"__builtin_lasx_xvslei_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_d : ClangBuiltin<"__builtin_lasx_xvslei_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslei_bu : ClangBuiltin<"__builtin_lasx_xvslei_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_hu : ClangBuiltin<"__builtin_lasx_xvslei_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_wu : ClangBuiltin<"__builtin_lasx_xvslei_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_du : ClangBuiltin<"__builtin_lasx_xvslei_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslti_b : ClangBuiltin<"__builtin_lasx_xvslti_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_h : ClangBuiltin<"__builtin_lasx_xvslti_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_w : ClangBuiltin<"__builtin_lasx_xvslti_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_d : ClangBuiltin<"__builtin_lasx_xvslti_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslti_bu : ClangBuiltin<"__builtin_lasx_xvslti_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_hu : ClangBuiltin<"__builtin_lasx_xvslti_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_wu : ClangBuiltin<"__builtin_lasx_xvslti_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_du : ClangBuiltin<"__builtin_lasx_xvslti_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddi_bu : ClangBuiltin<"__builtin_lasx_xvaddi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_hu : ClangBuiltin<"__builtin_lasx_xvaddi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_wu : ClangBuiltin<"__builtin_lasx_xvaddi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_du : ClangBuiltin<"__builtin_lasx_xvaddi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubi_bu : ClangBuiltin<"__builtin_lasx_xvsubi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_hu : ClangBuiltin<"__builtin_lasx_xvsubi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_wu : ClangBuiltin<"__builtin_lasx_xvsubi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_du : ClangBuiltin<"__builtin_lasx_xvsubi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaxi_b : ClangBuiltin<"__builtin_lasx_xvmaxi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_h : ClangBuiltin<"__builtin_lasx_xvmaxi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_w : ClangBuiltin<"__builtin_lasx_xvmaxi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_d : ClangBuiltin<"__builtin_lasx_xvmaxi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmini_b : ClangBuiltin<"__builtin_lasx_xvmini_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_h : ClangBuiltin<"__builtin_lasx_xvmini_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_w : ClangBuiltin<"__builtin_lasx_xvmini_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_d : ClangBuiltin<"__builtin_lasx_xvmini_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaxi_bu : ClangBuiltin<"__builtin_lasx_xvmaxi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_hu : ClangBuiltin<"__builtin_lasx_xvmaxi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_wu : ClangBuiltin<"__builtin_lasx_xvmaxi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_du : ClangBuiltin<"__builtin_lasx_xvmaxi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmini_bu : ClangBuiltin<"__builtin_lasx_xvmini_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_hu : ClangBuiltin<"__builtin_lasx_xvmini_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_wu : ClangBuiltin<"__builtin_lasx_xvmini_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_du : ClangBuiltin<"__builtin_lasx_xvmini_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvclz_b : ClangBuiltin<"__builtin_lasx_xvclz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_h : ClangBuiltin<"__builtin_lasx_xvclz_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_w : ClangBuiltin<"__builtin_lasx_xvclz_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_d : ClangBuiltin<"__builtin_lasx_xvclz_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpcnt_b : ClangBuiltin<"__builtin_lasx_xvpcnt_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_h : ClangBuiltin<"__builtin_lasx_xvpcnt_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_w : ClangBuiltin<"__builtin_lasx_xvpcnt_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_d : ClangBuiltin<"__builtin_lasx_xvpcnt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfsqrt_s : ClangBuiltin<"__builtin_lasx_xvfsqrt_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfsqrt_d : ClangBuiltin<"__builtin_lasx_xvfsqrt_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrint_s : ClangBuiltin<"__builtin_lasx_xvfrint_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrint_d : ClangBuiltin<"__builtin_lasx_xvfrint_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_w : ClangBuiltin<"__builtin_lasx_xvffint_s_w">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffint_d_l : ClangBuiltin<"__builtin_lasx_xvffint_d_l">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_wu : ClangBuiltin<"__builtin_lasx_xvffint_s_wu">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffint_d_lu : ClangBuiltin<"__builtin_lasx_xvffint_d_lu">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_wu_s : ClangBuiltin<"__builtin_lasx_xvftintrz_wu_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrz_lu_d : ClangBuiltin<"__builtin_lasx_xvftintrz_lu_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplgr2vr_b : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_h : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_w : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_d : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsgr2vr_w : ClangBuiltin<"__builtin_lasx_xvinsgr2vr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvinsgr2vr_d : ClangBuiltin<"__builtin_lasx_xvinsgr2vr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfdiv_s : ClangBuiltin<"__builtin_lasx_xvfdiv_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfdiv_d : ClangBuiltin<"__builtin_lasx_xvfdiv_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslli_b : ClangBuiltin<"__builtin_lasx_xvslli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_h : ClangBuiltin<"__builtin_lasx_xvslli_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_w : ClangBuiltin<"__builtin_lasx_xvslli_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_d : ClangBuiltin<"__builtin_lasx_xvslli_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrli_b : ClangBuiltin<"__builtin_lasx_xvsrli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_h : ClangBuiltin<"__builtin_lasx_xvsrli_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_w : ClangBuiltin<"__builtin_lasx_xvsrli_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_d : ClangBuiltin<"__builtin_lasx_xvsrli_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrai_b : ClangBuiltin<"__builtin_lasx_xvsrai_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_h : ClangBuiltin<"__builtin_lasx_xvsrai_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_w : ClangBuiltin<"__builtin_lasx_xvsrai_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_d : ClangBuiltin<"__builtin_lasx_xvsrai_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf4i_b : ClangBuiltin<"__builtin_lasx_xvshuf4i_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_h : ClangBuiltin<"__builtin_lasx_xvshuf4i_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_w : ClangBuiltin<"__builtin_lasx_xvshuf4i_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_d : ClangBuiltin<"__builtin_lasx_xvshuf4i_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrotr_b : ClangBuiltin<"__builtin_lasx_xvrotr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_h : ClangBuiltin<"__builtin_lasx_xvrotr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_w : ClangBuiltin<"__builtin_lasx_xvrotr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_d : ClangBuiltin<"__builtin_lasx_xvrotr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrotri_b : ClangBuiltin<"__builtin_lasx_xvrotri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_h : ClangBuiltin<"__builtin_lasx_xvrotri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_w : ClangBuiltin<"__builtin_lasx_xvrotri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_d : ClangBuiltin<"__builtin_lasx_xvrotri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvld : ClangBuiltin<"__builtin_lasx_xvld">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvst : ClangBuiltin<"__builtin_lasx_xvst">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvrepl128vei_b : ClangBuiltin<"__builtin_lasx_xvrepl128vei_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_h : ClangBuiltin<"__builtin_lasx_xvrepl128vei_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_w : ClangBuiltin<"__builtin_lasx_xvrepl128vei_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_d : ClangBuiltin<"__builtin_lasx_xvrepl128vei_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsve0_w : ClangBuiltin<"__builtin_lasx_xvinsve0_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvinsve0_d : ClangBuiltin<"__builtin_lasx_xvinsve0_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve_w : ClangBuiltin<"__builtin_lasx_xvpickve_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve_d : ClangBuiltin<"__builtin_lasx_xvpickve_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve_w_f : ClangBuiltin<"__builtin_lasx_xvpickve_w_f">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve_d_f : ClangBuiltin<"__builtin_lasx_xvpickve_d_f">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplve0_b : ClangBuiltin<"__builtin_lasx_xvreplve0_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_h : ClangBuiltin<"__builtin_lasx_xvreplve0_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_w : ClangBuiltin<"__builtin_lasx_xvreplve0_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_d : ClangBuiltin<"__builtin_lasx_xvreplve0_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_q : ClangBuiltin<"__builtin_lasx_xvreplve0_q">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_d_w : ClangBuiltin<"__builtin_lasx_vext2xv_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_w_h : ClangBuiltin<"__builtin_lasx_vext2xv_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_h_b : ClangBuiltin<"__builtin_lasx_vext2xv_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_d_h : ClangBuiltin<"__builtin_lasx_vext2xv_d_h">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_w_b : ClangBuiltin<"__builtin_lasx_vext2xv_w_b">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_d_b : ClangBuiltin<"__builtin_lasx_vext2xv_d_b">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_du_wu : ClangBuiltin<"__builtin_lasx_vext2xv_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_wu_hu : ClangBuiltin<"__builtin_lasx_vext2xv_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_hu_bu : ClangBuiltin<"__builtin_lasx_vext2xv_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_du_hu : ClangBuiltin<"__builtin_lasx_vext2xv_du_hu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_wu_bu : ClangBuiltin<"__builtin_lasx_vext2xv_wu_bu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_du_bu : ClangBuiltin<"__builtin_lasx_vext2xv_du_bu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpermi_q : ClangBuiltin<"__builtin_lasx_xvpermi_q">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpermi_d : ClangBuiltin<"__builtin_lasx_xvpermi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvperm_w : ClangBuiltin<"__builtin_lasx_xvperm_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlrni_b_h : ClangBuiltin<"__builtin_lasx_xvsrlrni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_h_w : ClangBuiltin<"__builtin_lasx_xvsrlrni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_w_d : ClangBuiltin<"__builtin_lasx_xvsrlrni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_d_q : ClangBuiltin<"__builtin_lasx_xvsrlrni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbz_v : ClangBuiltin<"__builtin_lasx_xbz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_v : ClangBuiltin<"__builtin_lasx_xbnz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbz_b : ClangBuiltin<"__builtin_lasx_xbz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_h : ClangBuiltin<"__builtin_lasx_xbz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_w : ClangBuiltin<"__builtin_lasx_xbz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_d : ClangBuiltin<"__builtin_lasx_xbz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_b : ClangBuiltin<"__builtin_lasx_xbnz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_h : ClangBuiltin<"__builtin_lasx_xbnz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_w : ClangBuiltin<"__builtin_lasx_xbnz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_d : ClangBuiltin<"__builtin_lasx_xbnz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvextl_q_d : ClangBuiltin<"__builtin_lasx_xvextl_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextl_qu_du : ClangBuiltin<"__builtin_lasx_xvextl_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch BASE ++ ++def int_loongarch_cpucfg : ClangBuiltin<"__builtin_loongarch_cpucfg">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_csrrd_w : ClangBuiltin<"__builtin_loongarch_csrrd_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_csrrd_d : ClangBuiltin<"__builtin_loongarch_csrrd_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; ++ ++def int_loongarch_csrwr_w : ClangBuiltin<"__builtin_loongarch_csrwr_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_csrwr_d : ClangBuiltin<"__builtin_loongarch_csrwr_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_csrxchg_w : ClangBuiltin<"__builtin_loongarch_csrxchg_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_csrxchg_d : ClangBuiltin<"__builtin_loongarch_csrxchg_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_iocsrrd_b : ClangBuiltin<"__builtin_loongarch_iocsrrd_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_h : ClangBuiltin<"__builtin_loongarch_iocsrrd_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_w : ClangBuiltin<"__builtin_loongarch_iocsrrd_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_d : ClangBuiltin<"__builtin_loongarch_iocsrrd_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_b : ClangBuiltin<"__builtin_loongarch_iocsrwr_b">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_h : ClangBuiltin<"__builtin_loongarch_iocsrwr_h">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_w : ClangBuiltin<"__builtin_loongarch_iocsrwr_w">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_d : ClangBuiltin<"__builtin_loongarch_iocsrwr_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_cacop_w : ClangBuiltin<"__builtin_loongarch_cacop_w">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_cacop_d : ClangBuiltin<"__builtin_loongarch_cacop_d">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_crc_w_b_w : ClangBuiltin<"__builtin_loongarch_crc_w_b_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_h_w : ClangBuiltin<"__builtin_loongarch_crc_w_h_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_w_w : ClangBuiltin<"__builtin_loongarch_crc_w_w_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_d_w : ClangBuiltin<"__builtin_loongarch_crc_w_d_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_b_w : ClangBuiltin<"__builtin_loongarch_crcc_w_b_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_h_w : ClangBuiltin<"__builtin_loongarch_crcc_w_h_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_w_w : ClangBuiltin<"__builtin_loongarch_crcc_w_w_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_d_w : ClangBuiltin<"__builtin_loongarch_crcc_w_d_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_tlbclr : ClangBuiltin<"__builtin_loongarch_tlbclr">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbflush : ClangBuiltin<"__builtin_loongarch_tlbflush">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbfill : ClangBuiltin<"__builtin_loongarch_tlbfill">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbrd : ClangBuiltin<"__builtin_loongarch_tlbrd">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbwr : ClangBuiltin<"__builtin_loongarch_tlbwr">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbsrch : ClangBuiltin<"__builtin_loongarch_tlbsrch">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_syscall : ClangBuiltin<"__builtin_loongarch_syscall">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_break : ClangBuiltin<"__builtin_loongarch_break">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_asrtle_d : ClangBuiltin<"__builtin_loongarch_asrtle_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_asrtgt_d : ClangBuiltin<"__builtin_loongarch_asrtgt_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_dbar : ClangBuiltin<"__builtin_loongarch_dbar">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_ibar : ClangBuiltin<"__builtin_loongarch_ibar">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++} +diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h +index ed2f70b0d..e2247e8ff 100644 +--- a/llvm/include/llvm/Object/ELFObjectFile.h ++++ b/llvm/include/llvm/Object/ELFObjectFile.h +@@ -753,7 +753,8 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { + } + if (ESym->getType() == ELF::STT_FUNC && (ESym->st_value & 1) == 1) + Result |= SymbolRef::SF_Thumb; +- } else if (EF.getHeader().e_machine == ELF::EM_RISCV) { ++ } else if (EF.getHeader().e_machine == ELF::EM_LOONGARCH || ++ EF.getHeader().e_machine == ELF::EM_RISCV) { + if (Expected NameOrErr = getSymbolName(Sym)) { + // Mark empty name symbols used for label differences. + if (NameOrErr->empty()) +diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +index 3d3b504c6..96cfe1fc0 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +@@ -701,6 +701,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap); ++ // FIXME: Fix rustc build error with lto=thin option on loongarch. ++ if (MF->getTarget().getTargetTriple().isLoongArch()) ++ return nullptr; + return &*MIB; + } + +diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +index 2badbe34a..88b8d1cf3 100644 +--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp ++++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +@@ -207,6 +207,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + break; ++ case Triple::loongarch32: ++ case Triple::loongarch64: ++ PersonalityEncoding = dwarf::DW_EH_PE_indirect; ++ ++ // Note: gas does not support pc-relative LSDA references. ++ LSDAEncoding = dwarf::DW_EH_PE_absptr; ++ ++ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ++ dwarf::DW_EH_PE_sdata4; ++ break; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: +diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +index 48aaab96e..65944be4c 100644 +--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp ++++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +@@ -263,6 +263,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { + case Triple::riscv64: + return CreateWithABI(EPC); + ++ case Triple::loongarch64: ++ return CreateWithABI(EPC); ++ + case Triple::x86_64: + if (TT.getOS() == Triple::OSType::Win32) + return CreateWithABI(EPC); +diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +index 38cab5267..ee2bd91e1 100644 +--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp ++++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +@@ -157,6 +157,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + ++ case Triple::loongarch64: { ++ typedef orc::LocalJITCompileCallbackManager CCMgrT; ++ return CCMgrT::Create(ES, ErrorHandlerAddress); ++ } ++ + case Triple::x86_64: { + if (T.getOS() == Triple::OSType::Win32) { + typedef orc::LocalJITCompileCallbackManager CCMgrT; +diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +index 20b655bdf..8b5fea92c 100644 +--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp ++++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +@@ -135,6 +135,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, + return LocalLazyCallThroughManager::Create(ES, + ErrorHandlerAddr); + ++ case Triple::loongarch64: ++ return LocalLazyCallThroughManager::Create( ++ ES, ErrorHandlerAddr); ++ + case Triple::x86_64: + if (T.getOS() == Triple::OSType::Win32) + return LocalLazyCallThroughManager::Create( +diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +index da8aaad08..d2b972e17 100644 +--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp ++++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +@@ -1077,5 +1077,205 @@ void OrcRiscv64::writeIndirectStubsBlock( + } + } + ++void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, ++ JITTargetAddress ResolverTargetAddress, ++ JITTargetAddress ReentryFnAddr, ++ JITTargetAddress ReentryCtxAddr) { ++ ++ const uint32_t ResolverCode[] = { ++ // resolver_entry: ++ 0x02fc8063, // 0x0: addi.d $r3,$r3,-224(0xf20) ++ 0x29c00064, // 0x4: st.d $r4,$r3,0 ++ 0x29c02065, // 0x8: st.d $r5,$r3,8(0x8) ++ 0x29c04066, // 0xc: st.d $r6,$r3,16(0x10) ++ 0x29c06067, // 0x10: st.d $r7,$r3,24(0x18) ++ 0x29c08068, // 0x14: st.d $r8,$r3,32(0x20) ++ 0x29c0a069, // 0x18: st.d $r9,$r3,40(0x28) ++ 0x29c0c06a, // 0x1c: st.d $r10,$r3,48(0x30) ++ 0x29c0e06b, // 0x20: st.d $r11,$r3,56(0x38) ++ 0x29c1006c, // 0x24: st.d $r12,$r3,64(0x40) ++ 0x29c1206d, // 0x28: st.d $r13,$r3,72(0x48) ++ 0x29c1406e, // 0x2c: st.d $r14,$r3,80(0x50) ++ 0x29c1606f, // 0x30: st.d $r15,$r3,88(0x58) ++ 0x29c18070, // 0x34: st.d $r16,$r3,96(0x60) ++ 0x29c1a071, // 0x38: st.d $r17,$r3,104(0x68) ++ 0x29c1c072, // 0x3c: st.d $r18,$r3,112(0x70) ++ 0x29c1e073, // 0x40: st.d $r19,$r3,120(0x78) ++ 0x29c20074, // 0x44: st.d $r20,$r3,128(0x80) ++ 0x29c22076, // 0x48: st.d $r22,$r3,136(0x88) ++ 0x29c24077, // 0x4c: st.d $r23,$r3,144(0x90) ++ 0x29c26078, // 0x50: st.d $r24,$r3,152(0x98) ++ 0x29c28079, // 0x54: st.d $r25,$r3,160(0xa0) ++ 0x29c2a07a, // 0x58: st.d $r26,$r3,168(0xa8) ++ 0x29c2c07b, // 0x5c: st.d $r27,$r3,176(0xb0) ++ 0x29c2e07c, // 0x60: st.d $r28,$r3,184(0xb8) ++ 0x29c3007d, // 0x64: st.d $r29,$r3,192(0xc0) ++ 0x29c3207e, // 0x68: st.d $r30,$r3,200(0xc8) ++ 0x29c3407f, // 0x6c: st.d $r31,$r3,208(0xd0) ++ 0x29c36061, // 0x70: st.d $r1,$r3,216(0xd8) ++ // JIT re-entry ctx addr. ++ 0x00000000, // 0x74: lu12i.w $a0,hi(ctx) ++ 0x00000000, // 0x78: ori $a0,$a0,lo(ctx) ++ 0x00000000, // 0x7c: lu32i.d $a0,higher(ctx) ++ 0x00000000, // 0x80: lu52i.d $a0,$a0,highest(ctx) ++ ++ 0x00150025, // 0x84: move $r5,$r1 ++ 0x02ffa0a5, // 0x88: addi.d $r5,$r5,-24(0xfe8) ++ ++ // JIT re-entry fn addr: ++ 0x00000000, // 0x8c: lu12i.w $t0,hi(reentry) ++ 0x00000000, // 0x90: ori $t0,$t0,lo(reentry) ++ 0x00000000, // 0x94: lu32i.d $t0,higher(reentry) ++ 0x00000000, // 0x98: lu52i.d $t0,$t0,highest(reentry) ++ 0x4c0002a1, // 0x9c: jirl $r1,$r21,0 ++ 0x00150095, // 0xa0: move $r21,$r4 ++ 0x28c36061, // 0xa4: ld.d $r1,$r3,216(0xd8) ++ 0x28c3407f, // 0xa8: ld.d $r31,$r3,208(0xd0) ++ 0x28c3207e, // 0xac: ld.d $r30,$r3,200(0xc8) ++ 0x28c3007d, // 0xb0: ld.d $r29,$r3,192(0xc0) ++ 0x28c2e07c, // 0xb4: ld.d $r28,$r3,184(0xb8) ++ 0x28c2c07b, // 0xb8: ld.d $r27,$r3,176(0xb0) ++ 0x28c2a07a, // 0xbc: ld.d $r26,$r3,168(0xa8) ++ 0x28c28079, // 0xc0: ld.d $r25,$r3,160(0xa0) ++ 0x28c26078, // 0xc4: ld.d $r24,$r3,152(0x98) ++ 0x28c24077, // 0xc8: ld.d $r23,$r3,144(0x90) ++ 0x28c22076, // 0xcc: ld.d $r22,$r3,136(0x88) ++ 0x28c20074, // 0xd0: ld.d $r20,$r3,128(0x80) ++ 0x28c1e073, // 0xd4: ld.d $r19,$r3,120(0x78) ++ 0x28c1c072, // 0xd8: ld.d $r18,$r3,112(0x70) ++ 0x28c1a071, // 0xdc: ld.d $r17,$r3,104(0x68) ++ 0x28c18070, // 0xe0: ld.d $r16,$r3,96(0x60) ++ 0x28c1606f, // 0xe4: ld.d $r15,$r3,88(0x58) ++ 0x28c1406e, // 0xe8: ld.d $r14,$r3,80(0x50) ++ 0x28c1206d, // 0xec: ld.d $r13,$r3,72(0x48) ++ 0x28c1006c, // 0xf0: ld.d $r12,$r3,64(0x40) ++ 0x28c0e06b, // 0xf4: ld.d $r11,$r3,56(0x38) ++ 0x28c0c06a, // 0xf8: ld.d $r10,$r3,48(0x30) ++ 0x28c0a069, // 0xfc: ld.d $r9,$r3,40(0x28) ++ 0x28c08068, // 0x100: ld.d $r8,$r3,32(0x20) ++ 0x28c06067, // 0x104: ld.d $r7,$r3,24(0x18) ++ 0x28c04066, // 0x108: ld.d $r6,$r3,16(0x10) ++ 0x28c02065, // 0x10c: ld.d $r5,$r3,8(0x8) ++ 0x28c00064, // 0x110: ld.d $r4,$r3,0 ++ 0x02c38063, // 0x114: addi.d $r3,$r3,224(0xe0) ++ 0x00150281, // 0x118: move $r1,$r20 ++ 0x4c0002a0, // 0x11c: jirl $r0,$r21,0 ++ }; ++ ++ const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lu12i.w ++ const unsigned ReentryCtxAddrOffset = 0x74; // JIT re-entry ctx addr lu12i.w ++ ++ memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); ++ ++ uint32_t ReentryCtxLU12i = 0x14000004 | ((ReentryCtxAddr << 32 >> 44) << 5); ++ uint32_t ReentryCtxORi = 0x03800084 | ((ReentryCtxAddr & 0xFFF) << 10); ++ uint32_t ReentryCtxLU32i = 0x16000004 | ((ReentryCtxAddr << 12 >> 44) << 5); ++ uint32_t ReentryCtxLU52i = 0x03000084 | ((ReentryCtxAddr >> 52) << 10); ++ ++ memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLU12i, ++ sizeof(ReentryCtxLU12i)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxORi, ++ sizeof(ReentryCtxORi)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxLU32i, ++ sizeof(ReentryCtxLU32i)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxLU52i, ++ sizeof(ReentryCtxLU52i)); ++ ++ uint32_t ReentryLU12i = 0x14000015 | ((ReentryFnAddr << 32 >> 44) << 5); ++ uint32_t ReentryORi = 0x038002b5 | ((ReentryFnAddr & 0xFFF) << 10); ++ uint32_t ReentryLU32i = 0x16000015 | ((ReentryFnAddr << 12 >> 44) << 5); ++ uint32_t ReentryLU52i = 0x030002b5 | ((ReentryFnAddr >> 52) << 10); ++ ++ memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryLU12i, ++ sizeof(ReentryLU12i)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryORi, ++ sizeof(ReentryORi)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryLU32i, ++ sizeof(ReentryLU32i)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryLU52i, ++ sizeof(ReentryLU52i)); ++} ++ ++void OrcLoongArch64::writeTrampolines( ++ char *TrampolineBlockWorkingMem, ++ JITTargetAddress TrampolineBlockTargetAddress, ++ JITTargetAddress ResolverFnAddr, unsigned NumTrampolines) { ++ ++ uint32_t *Trampolines = ++ reinterpret_cast(TrampolineBlockWorkingMem); ++ ++ uint64_t HiBits = ((ResolverFnAddr << 32 >> 44) << 5); ++ uint64_t LoBits = ((ResolverFnAddr & 0xFFF) << 10); ++ uint64_t HigherBits = ((ResolverFnAddr << 12 >> 44) << 5); ++ uint64_t HighestBits = ((ResolverFnAddr >> 52) << 10); ++ ++ for (unsigned I = 0; I < NumTrampolines; ++I) { ++ Trampolines[10 * I + 0] = 0x00150034; // move $t8,$ra ++ Trampolines[10 * I + 1] = ++ 0x14000015 | HiBits; // lu12i.w $r21,hi(ResolveAddr) ++ Trampolines[10 * I + 2] = ++ 0x038002b5 | LoBits; // ori $r21,$r21,lo(ResolveAddr) ++ Trampolines[10 * I + 3] = ++ 0x16000015 | HigherBits; // lu32i $r21,higher(ResolveAddr) ++ Trampolines[10 * I + 4] = ++ 0x030002b5 | HighestBits; // lu52i $r21,$r21,highest(ResolveAddr) ++ Trampolines[10 * I + 5] = 0x4c0002a1; // jirl $ra, $r21, 0 ++ } ++} ++ ++void OrcLoongArch64::writeIndirectStubsBlock( ++ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, ++ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { ++ // Stub format is: ++ // ++ // .section __orc_stubs ++ // stub1: ++ // lu12i.w $r21, %abs(ptr1)<<32>>44 ++ // ori $r21, $r21, %abs(ptr1)&0xfff ++ // lu32i.d $r21, %abs(ptr1)<<12>>44 ++ // lu52i.d $r21, $r21, %abs(ptr1)>>52 ++ // ld.d $r21, $r21, 0 ++ // jirl $r0, $r21, 0 ++ // stub2: ++ // lu12i.w $r21, %abs(ptr2)<<32>>44 ++ // ori $r21, $r21, %abs(ptr2)&0xfff ++ // lu32i.d $r21, %abs(ptr2)<<12>>44 ++ // lu52i.d $r21, $r21, %abs(ptr2)>>52 ++ // ld.d $r21, $r21, 0 ++ // jirl $r0, $r21, 0 ++ // ++ // ... ++ // ++ // .section __orc_ptrs ++ // ptr1: ++ // .dword 0x0 ++ // ptr2: ++ // .dword 0x0 ++ // ++ // ... ++ ++ assert(stubAndPointerRangesOk( ++ StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && ++ "PointersBlock is out of range"); ++ ++ // Populate the stubs page stubs and mark it executable. ++ uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); ++ uint64_t PtrAddr = PointersBlockTargetAddress; ++ ++ for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { ++ uint64_t HiBits = ((PtrAddr << 32 >> 44) << 5); ++ uint64_t LoBits = ((PtrAddr & 0xFFF) << 10); ++ uint64_t HigherBits = ((PtrAddr << 12 >> 44) << 5); ++ uint64_t HighestBits = ((PtrAddr >> 52) << 10); ++ Stub[8 * I + 0] = 0x14000015 | HiBits; // lu12i.w $r21, hi(PtrAddr) ++ Stub[8 * I + 1] = 0x038002b5 | LoBits; // ori $r21, $r21, lo(PtrAddr) ++ Stub[8 * I + 2] = 0x16000015 | HigherBits; // lu32i.d $r21, higher(PtrAddr) ++ Stub[8 * I + 3] = ++ 0x030002b5 | HighestBits; // lu52i.d $r21, $r21, highest(PtrAddr) ++ Stub[8 * I + 4] = 0x28c002b5; // ld.d $r21, $r21, 0 ++ Stub[8 * I + 5] = 0x4c0002a0; // jirl $r0, $r21, 0 ++ } ++} ++ + } // End namespace orc. + } // End namespace llvm. +diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +index c702584b7..2f1ec696a 100644 +--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp ++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +@@ -641,6 +641,191 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, + } + } + ++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, ++ uint64_t Value, uint32_t Type, ++ int64_t Addend) { ++ uint32_t *TargetPtr = ++ reinterpret_cast(Section.getAddressWithOffset(Offset)); ++ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); ++ uint64_t tmp1, tmp2, tmp3; ++ ++ LLVM_DEBUG(dbgs() << "[XXX] resolveLoongArch64Relocation, LocalAddress: 0x" ++ << format("%llx", Section.getAddressWithOffset(Offset)) ++ << " FinalAddress: 0x" << format("%llx", FinalAddress) ++ << " Value: 0x" << format("%llx", Value) << " Type: 0x" ++ << format("%x", Type) << " Addend: 0x" ++ << format("%llx", Addend) << "\n"); ++ ++ switch (Type) { ++ case ELF::R_LARCH_SOP_PUSH_GPREL: ++ case ELF::R_LARCH_SOP_PUSH_TLS_TPREL: ++ case ELF::R_LARCH_SOP_PUSH_TLS_GOT: ++ case ELF::R_LARCH_SOP_PUSH_TLS_GD: ++ default: ++ llvm_unreachable("Relocation type not implemented yet!"); ++ break; ++ case ELF::R_LARCH_MARK_LA: ++ // mark la ++ MarkLA = true; ++ break; ++ case ELF::R_LARCH_SOP_PUSH_ABSOLUTE: ++ if (MarkLA && !Addend) ++ // push(value) ++ ValuesStack.push_back(Value); ++ else ++ // push(addend) ++ ValuesStack.push_back(Addend); ++ break; ++ case ELF::R_LARCH_SOP_PUSH_PLT_PCREL: ++ case ELF::R_LARCH_SOP_PUSH_PCREL: ++ MarkLA = false; ++ // push(value -pc + addend) ++ ValuesStack.push_back(Value - FinalAddress + Addend); ++ break; ++ case ELF::R_LARCH_SOP_NOT: ++ // pop(tmp1) ++ // push(!tmp1) ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(!tmp1); ++ break; ++ case ELF::R_LARCH_SOP_AND: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 & tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 & tmp2); ++ break; ++ case ELF::R_LARCH_SOP_IF_ELSE: ++ // pop(tmp3) ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 ? tmp2 : tmp3) ++ tmp3 = ValuesStack.pop_back_val(); ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 ? tmp2 : tmp3); ++ break; ++ case ELF::R_LARCH_SOP_ADD: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 + tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 + tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SUB: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 - tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 - tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SR: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 >> tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 >> tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SL: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 << tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 << tmp2); ++ break; ++ case ELF::R_LARCH_32: ++ support::ulittle32_t::ref{TargetPtr} = ++ static_cast(Value + Addend); ++ break; ++ case ELF::R_LARCH_64: ++ support::ulittle64_t::ref{TargetPtr} = Value + Addend; ++ break; ++ case ELF::R_LARCH_SOP_POP_32_U_10_12: ++ case ELF::R_LARCH_SOP_POP_32_S_10_12: ++ // pop(tmp1) ++ // get(inst) ++ // inst=(inst & 0xffc003ff)|((tmp1 & 0xfff) << 10) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xffc003ff) | ++ static_cast((tmp1 & 0xfff) << 10); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_5_20: ++ // pop(tmp1) ++ // get(inst) ++ // inst=(inst & 0xfe00001f)|((tmp1 & 0xfffff) << 5) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfe00001f) | ++ static_cast((tmp1 & 0xfffff) << 5); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>=2 ++ // get(inst) ++ // inst=(inst & 0xfc0003ff)|((tmp1 & 0xffff) << 10) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc0003ff) | ++ static_cast((tmp1 & 0xffff) << 10); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>= 2 ++ // get(inst) ++ // inst=(inst & 0xfc0003e0)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x1f0000) >> ++ // 16) write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc0003e0) | ++ static_cast((tmp1 & 0xffff) << 10) | ++ static_cast((tmp1 & 0x1f0000) >> 16); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>= 2 ++ // get(inst) ++ // inst=(inst & 0xfc000000)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x3ff0000) >> ++ // 16) write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc000000) | ++ static_cast((tmp1 & 0xffff) << 10) | ++ static_cast((tmp1 & 0x3ff0000) >> 16); ++ break; ++ case ELF::R_LARCH_ADD32: ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} + ++ static_cast(Value + Addend)); ++ break; ++ case ELF::R_LARCH_SUB32: ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} - ++ static_cast(Value + Addend)); ++ break; ++ case ELF::R_LARCH_ADD64: ++ support::ulittle64_t::ref{TargetPtr} = ++ (support::ulittle64_t::ref{TargetPtr} + Value + Addend); ++ break; ++ case ELF::R_LARCH_SUB64: ++ support::ulittle64_t::ref{TargetPtr} = ++ (support::ulittle64_t::ref{TargetPtr} - Value - Addend); ++ break; ++ } ++} ++ + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { + if (Arch == Triple::UnknownArch || + !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { +@@ -1057,6 +1242,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, + resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); + break; ++ case Triple::loongarch64: ++ resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); ++ break; + case Triple::ppc: // Fall through. + case Triple::ppcle: + resolvePPC32Relocation(Section, Offset, Value, Type, Addend); +@@ -1369,6 +1557,58 @@ RuntimeDyldELF::processRelocationRef( + } + processSimpleRelocation(SectionID, Offset, RelType, Value); + } ++ } else if (Arch == Triple::loongarch64) { ++ if (RelType == ELF::R_LARCH_32 || RelType == ELF::R_LARCH_64 || ++ (RelType >= ELF::R_LARCH_ADD8 && RelType <= ELF::R_LARCH_SUB64)) { ++ if (TargetName.size() == 0 && ++ Sections[SectionID].getAddress() != nullptr) { ++ uint64_t SymOffset = 0; ++ unsigned SID = 0; ++ auto SectionOrErr = Symbol->getSection(); ++ if (!SectionOrErr) { ++ std::string Buf; ++ raw_string_ostream OS(Buf); ++ logAllUnhandledErrors(SectionOrErr.takeError(), OS); ++ report_fatal_error(Twine(OS.str())); ++ } ++ section_iterator si = *SectionOrErr; ++ if (si == Obj.section_end()) ++ llvm_unreachable("Symbol section not found!"); ++ bool isCode = si->isText(); ++ if (auto SectionIDOrErr = ++ findOrEmitSection(Obj, (*si), isCode, ObjSectionToID)) { ++ SID = *SectionIDOrErr; ++ } else ++ return SectionIDOrErr.takeError(); ++ auto OffsetOrErr = Symbol->getAddress(); ++ if (OffsetOrErr) ++ SymOffset = *OffsetOrErr; ++ uint64_t Target = Sections[SID].getLoadAddress() + SymOffset; ++ resolveRelocation(Sections[SectionID], Offset, Target, RelType, Addend); ++ } else { ++ processSimpleRelocation(SectionID, Offset, RelType, Value); ++ } ++ } else { ++ RTDyldSymbolTable::const_iterator Loc = ++ GlobalSymbolTable.find(TargetName); ++ if (!TargetName.empty()) { ++ if (Loc == GlobalSymbolTable.end()) { ++ IsSaved = true; ++ SavedSymbol = TargetName; ++ } else { ++ IsSaved = false; ++ } ++ } ++ if (IsSaved == true) { ++ Value.SymbolName = SavedSymbol.data(); ++ processSimpleRelocation(SectionID, Offset, RelType, Value); ++ } else { ++ uint8_t *TargetAddr = getSymbolLocalAddress(TargetName); ++ resolveRelocation(Sections[SectionID], Offset, ++ reinterpret_cast(TargetAddr), RelType, ++ Addend); ++ } ++ } + } else if (IsMipsO32ABI) { + uint8_t *Placeholder = reinterpret_cast( + computePlaceholderAddress(SectionID, Offset)); +@@ -2218,6 +2458,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: ++ case Triple::loongarch64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: +diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +index 1251036f4..ba898f654 100644 +--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h ++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +@@ -48,6 +48,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, + uint32_t Value, uint32_t Type, int32_t Addend); + ++ void resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, uint64_t Value, ++ uint32_t Type, int64_t Addend); ++ + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + +@@ -155,6 +159,12 @@ private: + // EH frame sections with the memory manager. + SmallVector UnregisteredEHFrameSections; + ++ // For loongarch evaluteRelocation ++ SmallVector ValuesStack; ++ bool IsSaved; ++ bool MarkLA; ++ StringRef SavedSymbol; ++ + // Map between GOT relocation value and corresponding GOT offset + std::map GOTOffsetMap; + +diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp +index d41381337..3ef27601f 100644 +--- a/llvm/lib/IR/Function.cpp ++++ b/llvm/lib/IR/Function.cpp +@@ -38,6 +38,7 @@ + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsDirectX.h" + #include "llvm/IR/IntrinsicsHexagon.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsMips.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" +diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp +index d6fe952c0..254c2fa60 100644 +--- a/llvm/lib/MC/MCObjectFileInfo.cpp ++++ b/llvm/lib/MC/MCObjectFileInfo.cpp +@@ -332,6 +332,12 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { + + void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { + switch (T.getArch()) { ++ case Triple::loongarch32: ++ case Triple::loongarch64: ++ FDECFIEncoding = Ctx->getAsmInfo()->getCodePointerSize() == 4 ++ ? dwarf::DW_EH_PE_sdata4 ++ : dwarf::DW_EH_PE_sdata8; ++ break; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: +diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp +index e14301663..ec54d7b59 100644 +--- a/llvm/lib/Object/RelocationResolver.cpp ++++ b/llvm/lib/Object/RelocationResolver.cpp +@@ -511,6 +511,28 @@ static uint64_t resolveCSKY(uint64_t Type, uint64_t Offset, uint64_t S, + } + } + ++static bool supportsLoongArch(uint64_t Type) { ++ switch (Type) { ++ case ELF::R_LARCH_32: ++ case ELF::R_LARCH_64: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, ++ uint64_t LocData, int64_t Addend) { ++ switch (Type) { ++ case ELF::R_LARCH_32: ++ return (S + Addend) & 0xFFFFFFFF; ++ case ELF::R_LARCH_64: ++ return S + Addend; ++ default: ++ llvm_unreachable("Invalid relocation type"); ++ } ++} ++ + static bool supportsCOFFX86(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_I386_SECREL: +@@ -725,6 +747,8 @@ getRelocationResolver(const ObjectFile &Obj) { + return {supportsAmdgpu, resolveAmdgpu}; + case Triple::riscv64: + return {supportsRISCV, resolveRISCV}; ++ case Triple::loongarch64: ++ return {supportsLoongArch, resolveLoongArch}; + default: + return {nullptr, nullptr}; + } +@@ -760,6 +784,8 @@ getRelocationResolver(const ObjectFile &Obj) { + return {supportsRISCV, resolveRISCV}; + case Triple::csky: + return {supportsCSKY, resolveCSKY}; ++ case Triple::loongarch32: ++ return {supportsLoongArch, resolveLoongArch}; + default: + return {nullptr, nullptr}; + } +diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp +index 9ad2c4135..b778006cf 100644 +--- a/llvm/lib/ObjectYAML/ELFYAML.cpp ++++ b/llvm/lib/ObjectYAML/ELFYAML.cpp +@@ -518,14 +518,6 @@ void ScalarBitSetTraits::bitset(IO &IO, + BCaseMask(EF_AVR_ARCH_XMEGA7, EF_AVR_ARCH_MASK); + BCase(EF_AVR_LINKRELAX_PREPARED); + break; +- case ELF::EM_LOONGARCH: +- BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32S, EF_LOONGARCH_BASE_ABI_MASK); +- BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32F, EF_LOONGARCH_BASE_ABI_MASK); +- BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32D, EF_LOONGARCH_BASE_ABI_MASK); +- BCaseMask(EF_LOONGARCH_BASE_ABI_LP64S, EF_LOONGARCH_BASE_ABI_MASK); +- BCaseMask(EF_LOONGARCH_BASE_ABI_LP64F, EF_LOONGARCH_BASE_ABI_MASK); +- BCaseMask(EF_LOONGARCH_BASE_ABI_LP64D, EF_LOONGARCH_BASE_ABI_MASK); +- break; + case ELF::EM_RISCV: + BCase(EF_RISCV_RVC); + BCaseMask(EF_RISCV_FLOAT_ABI_SOFT, EF_RISCV_FLOAT_ABI); +diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp +index 6696d158b..2c07c1a29 100644 +--- a/llvm/lib/Support/Triple.cpp ++++ b/llvm/lib/Support/Triple.cpp +@@ -257,6 +257,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { + case GNU: return "gnu"; + case GNUABI64: return "gnuabi64"; + case GNUABIN32: return "gnuabin32"; ++ case GNUABILPX32: return "gnuabilpx32"; + case GNUEABI: return "gnueabi"; + case GNUEABIHF: return "gnueabihf"; + case GNUX32: return "gnux32"; +diff --git a/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt b/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt +index 296160531..cb8b768d5 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt ++++ b/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt +@@ -2,10 +2,10 @@ add_llvm_component_library(LLVMLoongArchAsmParser + LoongArchAsmParser.cpp + + LINK_COMPONENTS +- LoongArchDesc +- LoongArchInfo + MC + MCParser ++ LoongArchDesc ++ LoongArchInfo + Support + + ADD_TO_COMPONENT +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index 9793c7bc3..2d35dfd0c 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -1,4 +1,4 @@ +-// LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions -=// ++//===-- LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions ----===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,551 +6,2278 @@ + // + //===----------------------------------------------------------------------===// + +-#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "LoongArchTargetStreamer.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "TargetInfo/LoongArchTargetInfo.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/ADT/Twine.h" ++#include "llvm/BinaryFormat/ELF.h" + #include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstrDesc.h" + #include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCObjectFileInfo.h" + #include "llvm/MC/MCParser/MCAsmLexer.h" ++#include "llvm/MC/MCParser/MCAsmParser.h" ++#include "llvm/MC/MCParser/MCAsmParserExtension.h" + #include "llvm/MC/MCParser/MCParsedAsmOperand.h" + #include "llvm/MC/MCParser/MCTargetAsmParser.h" +-#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSectionELF.h" + #include "llvm/MC/MCStreamer.h" + #include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/MC/SubtargetFeature.h" + #include "llvm/MC/TargetRegistry.h" + #include "llvm/Support/Casting.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/SMLoc.h" ++#include "llvm/Support/SourceMgr.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++#include ++#include ++#include ++#include + + using namespace llvm; + + #define DEBUG_TYPE "loongarch-asm-parser" + ++namespace llvm { ++ ++class MCInstrInfo; ++ ++} // end namespace llvm ++ ++namespace { ++ ++class LoongArchAssemblerOptions { ++public: ++ LoongArchAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {} ++ ++ LoongArchAssemblerOptions(const LoongArchAssemblerOptions *Opts) { ++ Features = Opts->getFeatures(); ++ } ++ ++ const FeatureBitset &getFeatures() const { return Features; } ++ void setFeatures(const FeatureBitset &Features_) { Features = Features_; } ++ ++private: ++ FeatureBitset Features; ++}; ++ ++} // end anonymous namespace ++ + namespace { ++ + class LoongArchAsmParser : public MCTargetAsmParser { +- SMLoc getLoc() const { return getParser().getTok().getLoc(); } ++ LoongArchTargetStreamer &getTargetStreamer() { ++ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); ++ return static_cast(TS); ++ } ++ ++ LoongArchABIInfo ABI; ++ SmallVector, 2> AssemblerOptions; ++ MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a ++ // nullptr, which indicates that no function is currently ++ // selected. This usually happens after an '.end' ++ // directive. ++ bool IsPicEnabled; + +- /// Parse a register as used in CFI directives. ++ // Map of register aliases created via the .set directive. ++ StringMap RegisterSets; ++ ++#define GET_ASSEMBLER_HEADER ++#include "LoongArchGenAsmMatcher.inc" ++ ++ unsigned checkTargetMatchPredicate(MCInst &Inst) override; ++ ++ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ++ OperandVector &Operands, MCStreamer &Out, ++ uint64_t &ErrorInfo, ++ bool MatchingInlineAsm) override; ++ ++ /// Parse a register as used in CFI directives + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; + ++ bool mnemonicIsValid(StringRef Mnemonic); ++ + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + +- bool ParseDirective(AsmToken DirectiveID) override { return true; } ++ bool ParseDirective(AsmToken DirectiveID) override; ++ ++ OperandMatchResultTy parseMemOperand(OperandVector &Operands); ++ OperandMatchResultTy parseAMemOperand(OperandVector &Operands); ++ OperandMatchResultTy ++ matchAnyRegisterNameWithoutDollar(OperandVector &Operands, ++ StringRef Identifier, SMLoc S); ++ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ const AsmToken &Token, ++ SMLoc S); ++ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ SMLoc S); ++ OperandMatchResultTy parseAnyRegister(OperandVector &Operands); ++ OperandMatchResultTy parseJumpTarget(OperandVector &Operands); ++ ++ bool searchSymbolAlias(OperandVector &Operands); ++ ++ bool parseOperand(OperandVector &, StringRef Mnemonic); ++ ++ enum MacroExpanderResultTy { ++ MER_NotAMacro, ++ MER_Success, ++ MER_Fail, ++ }; + +- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +- OperandVector &Operands, MCStreamer &Out, +- uint64_t &ErrorInfo, +- bool MatchingInlineAsm) override; ++ // Expands assembly pseudo instructions. ++ MacroExpanderResultTy tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI); + +- unsigned checkTargetMatchPredicate(MCInst &Inst) override; ++ bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); + +- unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, +- unsigned Kind) override; ++ bool expandLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); + +- bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, +- int64_t Lower, int64_t Upper, Twine Msg); ++ bool reportParseError(Twine ErrorMsg); + +- /// Helper for processing MC instructions that have been successfully matched +- /// by MatchAndEmitInstruction. +- bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, +- MCStreamer &Out); ++ bool parseMemOffset(const MCExpr *&Res); + +-// Auto-generated instruction matching functions. +-#define GET_ASSEMBLER_HEADER +-#include "LoongArchGenAsmMatcher.inc" ++ bool isEvaluated(const MCExpr *Expr); ++ bool parseDirectiveSet(); ++ ++ bool parseSetAssignment(); ++ ++ bool parseInternalDirectiveReallowModule(); ++ ++ int matchCPURegisterName(StringRef Symbol); ++ ++ int matchFPURegisterName(StringRef Name); ++ ++ int matchFCFRRegisterName(StringRef Name); ++ int matchFCSRRegisterName(StringRef Name); ++ ++ int matchLSX128RegisterName(StringRef Name); ++ ++ int matchLASX256RegisterName(StringRef Name); ++ ++ bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); ++ ++ // Helper function that checks if the value of a vector index is within the ++ // boundaries of accepted values for each RegisterKind ++ // Example: VINSGR2VR.B $v0[n], $1 => 16 > n >= 0 ++ bool validateLSXIndex(int Val, int RegKind); ++ ++ void setFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ if (!(getSTI().getFeatureBits()[Feature])) { ++ MCSubtargetInfo &STI = copySTI(); ++ setAvailableFeatures( ++ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); ++ AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); ++ } ++ } + +- OperandMatchResultTy parseRegister(OperandVector &Operands); +- OperandMatchResultTy parseImmediate(OperandVector &Operands); ++ void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ if (getSTI().getFeatureBits()[Feature]) { ++ MCSubtargetInfo &STI = copySTI(); ++ setAvailableFeatures( ++ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); ++ AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); ++ } ++ } ++ ++ void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ setFeatureBits(Feature, FeatureString); ++ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); ++ } + +- bool parseOperand(OperandVector &Operands, StringRef Mnemonic); ++ void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ clearFeatureBits(Feature, FeatureString); ++ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); ++ } + + public: + enum LoongArchMatchResultTy { +- Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, +- Match_RequiresMsbNotLessThanLsb, +- Match_RequiresOpnd2NotR0R1, ++ Match_RequiresNoZeroRegister = FIRST_TARGET_MATCH_RESULT_TY, ++ Match_RequiresNoRaRegister, ++ Match_RequiresRange0_31, ++ Match_RequiresRange0_63, ++ Match_MsbHigherThanLsb, ++ Match_RequiresPosSizeUImm6, + #define GET_OPERAND_DIAGNOSTIC_TYPES + #include "LoongArchGenAsmMatcher.inc" + #undef GET_OPERAND_DIAGNOSTIC_TYPES + }; + +- LoongArchAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, ++ LoongArchAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) +- : MCTargetAsmParser(Options, STI, MII) { +- Parser.addAliasForDirective(".half", ".2byte"); +- Parser.addAliasForDirective(".hword", ".2byte"); +- Parser.addAliasForDirective(".word", ".4byte"); +- Parser.addAliasForDirective(".dword", ".8byte"); ++ : MCTargetAsmParser(Options, sti, MII), ++ ABI(LoongArchABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), ++ sti.getCPU(), Options)) { ++ MCAsmParserExtension::Initialize(parser); ++ ++ parser.addAliasForDirective(".asciiz", ".asciz"); ++ parser.addAliasForDirective(".hword", ".2byte"); ++ parser.addAliasForDirective(".word", ".4byte"); ++ parser.addAliasForDirective(".dword", ".8byte"); + + // Initialize the set of available features. +- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); ++ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); ++ ++ // Remember the initial assembler options. The user can not modify these. ++ AssemblerOptions.push_back( ++ std::make_unique(getSTI().getFeatureBits())); ++ ++ // Create an assembler options environment for the user to modify. ++ AssemblerOptions.push_back( ++ std::make_unique(getSTI().getFeatureBits())); ++ ++ getTargetStreamer().updateABIInfo(*this); ++ ++ CurrentFn = nullptr; ++ ++ IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); ++ } ++ ++ bool is64Bit() const { ++ return getSTI().getFeatureBits()[LoongArch::Feature64Bit]; ++ } ++ ++ bool isFP64bit() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureFP64Bit]; ++ } ++ ++ const LoongArchABIInfo &getABI() const { return ABI; } ++ bool isABI_LPX32() const { return ABI.IsLPX32(); } ++ bool isABI_LP64() const { return ABI.IsLP64(); } ++ bool isABI_LP32() const { return ABI.IsLP32(); } ++ ++ bool hasLSX() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureLSX]; ++ } ++ ++ bool hasLASX() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureLASX]; ++ } ++ ++ bool inPicMode() { ++ return IsPicEnabled; ++ } ++ ++ bool useSoftFloat() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureSoftFloat]; ++ } ++ ++ const MCExpr *createTargetUnaryExpr(const MCExpr *E, ++ AsmToken::TokenKind OperatorToken, ++ MCContext &Ctx) override { ++ switch(OperatorToken) { ++ default: ++ llvm_unreachable("Unknown token"); ++ return nullptr; ++#if 0 ++ case AsmToken::PercentPlt: ++ return LoongArchMCExpr::create(LoongArchMCExpr::MEK_PLT, E, Ctx); ++#endif ++ } + } + }; + +-// Instances of this class represent a parsed LoongArch machine instruction. ++/// LoongArchOperand - Instances of this class represent a parsed LoongArch machine ++/// instruction. + class LoongArchOperand : public MCParsedAsmOperand { +- enum class KindTy { +- Token, +- Register, +- Immediate, ++public: ++ /// Broad categories of register classes ++ /// The exact class is finalized by the render method. ++ enum RegKind { ++ RegKind_GPR = 1, /// GPR32 and GPR64 (depending on is64Bit()) ++ RegKind_FGR = 2, /// FGR32, FGR64 (depending on isFP64bit()) ++ RegKind_FCFR = 4, /// FCFR ++ RegKind_FCSR = 8, /// FCSR ++ RegKind_LSX128 = 16, /// LSX128[BHWD] (makes no difference which) ++ RegKind_LASX256 = 32, /// LASX256[BHWD] (makes no difference which) ++ RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCFR | RegKind_FCSR | ++ RegKind_LSX128 | RegKind_LASX256 ++ }; ++ ++private: ++ enum KindTy { ++ k_Immediate, /// An immediate (possibly involving symbol references) ++ k_Memory, /// Base + Offset Memory Address ++ k_RegisterIndex, /// A register index in one or more RegKind. ++ k_Token, /// A simple token ++ k_RegList, /// A physical register list + } Kind; + +- struct RegOp { +- MCRegister RegNum; ++public: ++ LoongArchOperand(KindTy K, LoongArchAsmParser &Parser) ++ : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} ++ ++ ~LoongArchOperand() override { ++ switch (Kind) { ++ case k_Memory: ++ delete Mem.Base; ++ break; ++ case k_RegList: ++ delete RegList.List; ++ break; ++ case k_Immediate: ++ case k_RegisterIndex: ++ case k_Token: ++ break; ++ } ++ } ++ ++private: ++ /// For diagnostics, and checking the assembler temporary ++ LoongArchAsmParser &AsmParser; ++ ++ struct Token { ++ const char *Data; ++ unsigned Length; ++ }; ++ ++ struct RegIdxOp { ++ unsigned Index; /// Index into the register class ++ RegKind Kind; /// Bitfield of the kinds it could possibly be ++ struct Token Tok; /// The input token this operand originated from. ++ const MCRegisterInfo *RegInfo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + +- SMLoc StartLoc, EndLoc; +- union { +- StringRef Tok; +- struct RegOp Reg; +- struct ImmOp Imm; ++ struct MemOp { ++ LoongArchOperand *Base; ++ const MCExpr *Off; + }; + +-public: +- LoongArchOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} ++ struct RegListOp { ++ SmallVector *List; ++ }; + +- bool isToken() const override { return Kind == KindTy::Token; } +- bool isReg() const override { return Kind == KindTy::Register; } +- bool isImm() const override { return Kind == KindTy::Immediate; } +- bool isMem() const override { return false; } +- void setReg(MCRegister PhysReg) { Reg.RegNum = PhysReg; } ++ union { ++ struct Token Tok; ++ struct RegIdxOp RegIdx; ++ struct ImmOp Imm; ++ struct MemOp Mem; ++ struct RegListOp RegList; ++ }; + +- static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) { +- if (auto CE = dyn_cast(Expr)) { +- Imm = CE->getValue(); +- return true; +- } ++ SMLoc StartLoc, EndLoc; + +- return false; ++ /// Internal constructor for register kinds ++ static std::unique_ptr CreateReg(unsigned Index, StringRef Str, ++ RegKind RegKind, ++ const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, ++ LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_RegisterIndex, Parser); ++ Op->RegIdx.Index = Index; ++ Op->RegIdx.RegInfo = RegInfo; ++ Op->RegIdx.Kind = RegKind; ++ Op->RegIdx.Tok.Data = Str.data(); ++ Op->RegIdx.Tok.Length = Str.size(); ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; + } + +- template bool isUImm() const { +- if (!isImm()) +- return false; +- +- int64_t Imm; +- bool IsConstantImm = evaluateConstantImm(getImm(), Imm); +- return IsConstantImm && isUInt(Imm - P); ++public: ++ /// Coerce the register to GPR32 and return the real register for the current ++ /// target. ++ unsigned getGPR32Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR32RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +- template bool isSImm() const { +- if (!isImm()) +- return false; +- +- int64_t Imm; +- bool IsConstantImm = evaluateConstantImm(getImm(), Imm); +- return IsConstantImm && isShiftedInt(Imm); +- } +- +- bool isUImm2() const { return isUImm<2>(); } +- bool isUImm2plus1() const { return isUImm<2, 1>(); } +- bool isUImm3() const { return isUImm<3>(); } +- bool isUImm5() const { return isUImm<5>(); } +- bool isUImm6() const { return isUImm<6>(); } +- bool isUImm8() const { return isUImm<8>(); } +- bool isUImm12() const { return isUImm<12>(); } +- bool isUImm14() const { return isUImm<14>(); } +- bool isUImm15() const { return isUImm<15>(); } +- bool isSImm12() const { return isSImm<12>(); } +- bool isSImm14lsl2() const { return isSImm<14, 2>(); } +- bool isSImm16() const { return isSImm<16>(); } +- bool isSImm16lsl2() const { return isSImm<16, 2>(); } +- bool isSImm20() const { return isSImm<20>(); } +- bool isSImm21lsl2() const { return isSImm<21, 2>(); } +- bool isSImm26lsl2() const { return isSImm<26, 2>(); } +- +- /// Gets location of the first token of this operand. +- SMLoc getStartLoc() const override { return StartLoc; } +- /// Gets location of the last token of this operand. +- SMLoc getEndLoc() const override { return EndLoc; } +- +- unsigned getReg() const override { +- assert(Kind == KindTy::Register && "Invalid type access!"); +- return Reg.RegNum.id(); ++ /// Coerce the register to GPR32 and return the real register for the current ++ /// target. ++ unsigned getGPRMM16Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR32RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +- const MCExpr *getImm() const { +- assert(Kind == KindTy::Immediate && "Invalid type access!"); +- return Imm.Val; ++ /// Coerce the register to GPR64 and return the real register for the current ++ /// target. ++ unsigned getGPR64Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR64RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +- StringRef getToken() const { +- assert(Kind == KindTy::Token && "Invalid type access!"); +- return Tok; ++private: ++ /// Coerce the register to FGR64 and return the real register for the current ++ /// target. ++ unsigned getFGR64Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FGR64RegClassID) ++ .getRegister(RegIdx.Index); + } + +- void print(raw_ostream &OS) const override { +- auto RegName = [](unsigned Reg) { +- if (Reg) +- return LoongArchInstPrinter::getRegisterName(Reg); +- else +- return "noreg"; +- }; ++ /// Coerce the register to FGR32 and return the real register for the current ++ /// target. ++ unsigned getFGR32Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FGR32RegClassID) ++ .getRegister(RegIdx.Index); ++ } + +- switch (Kind) { +- case KindTy::Immediate: +- OS << *getImm(); +- break; +- case KindTy::Register: +- OS << ""; +- break; +- case KindTy::Token: +- OS << "'" << getToken() << "'"; +- break; +- } ++ /// Coerce the register to FCFR and return the real register for the current ++ /// target. ++ unsigned getFCFRReg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FCFR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FCFRRegClassID) ++ .getRegister(RegIdx.Index); + } + +- static std::unique_ptr createToken(StringRef Str, SMLoc S) { +- auto Op = std::make_unique(KindTy::Token); +- Op->Tok = Str; +- Op->StartLoc = S; +- Op->EndLoc = S; +- return Op; ++ /// Coerce the register to LSX128 and return the real register for the current ++ /// target. ++ unsigned getLSX128Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_LSX128) && "Invalid access!"); ++ // It doesn't matter which of the LSX128[BHWD] classes we use. They are all ++ // identical ++ unsigned ClassID = LoongArch::LSX128BRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +- static std::unique_ptr createReg(unsigned RegNo, SMLoc S, +- SMLoc E) { +- auto Op = std::make_unique(KindTy::Register); +- Op->Reg.RegNum = RegNo; +- Op->StartLoc = S; +- Op->EndLoc = E; +- return Op; ++ unsigned getLASX256Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_LASX256) && "Invalid access!"); ++ unsigned ClassID = LoongArch::LASX256BRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + +- static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, +- SMLoc E) { +- auto Op = std::make_unique(KindTy::Immediate); +- Op->Imm.Val = Val; +- Op->StartLoc = S; +- Op->EndLoc = E; +- return Op; ++ /// Coerce the register to CCR and return the real register for the ++ /// current target. ++ unsigned getFCSRReg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FCSR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::FCSRRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + ++public: + void addExpr(MCInst &Inst, const MCExpr *Expr) const { +- if (auto CE = dyn_cast(Expr)) ++ // Add as immediate when possible. Null MCExpr = 0. ++ if (!Expr) ++ Inst.addOperand(MCOperand::createImm(0)); ++ else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + +- // Used by the TableGen Code. + void addRegOperands(MCInst &Inst, unsigned N) const { +- assert(N == 1 && "Invalid number of operands!"); +- Inst.addOperand(MCOperand::createReg(getReg())); ++ llvm_unreachable("Use a custom parser instead"); + } +- void addImmOperands(MCInst &Inst, unsigned N) const { ++ ++ /// Render the operand to an MCInst as a GPR32 ++ /// Asserts if the wrong number of operands are requested, or the operand ++ /// is not a k_RegisterIndex compatible with RegKind_GPR ++ void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); +- addExpr(Inst, getImm()); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } +-}; +-} // end namespace + +-#define GET_REGISTER_MATCHER +-#define GET_SUBTARGET_FEATURE_NAME +-#define GET_MATCHER_IMPLEMENTATION +-#define GET_MNEMONIC_SPELL_CHECKER +-#include "LoongArchGenAsmMatcher.inc" ++ void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); ++ } + +-static MCRegister convertFPR32ToFPR64(MCRegister Reg) { +- assert(Reg >= LoongArch::F0 && Reg <= LoongArch::F31 && "Invalid register"); +- return Reg - LoongArch::F0 + LoongArch::F0_64; +-} ++ void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); ++ } + +-// Attempts to match Name as a register (either using the default name or +-// alternative ABI names), setting RegNo to the matching register. Upon +-// failure, returns true and sets RegNo to 0. +-static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) { +- RegNo = MatchRegisterName(Name); +- // The 32-bit and 64-bit FPRs have the same asm name. Check that the initial +- // match always matches the 32-bit variant, and not the 64-bit one. +- assert(!(RegNo >= LoongArch::F0_64 && RegNo <= LoongArch::F31_64)); +- // The default FPR register class is based on the tablegen enum ordering. +- static_assert(LoongArch::F0 < LoongArch::F0_64, +- "FPR matching must be updated"); +- if (RegNo == LoongArch::NoRegister) +- RegNo = MatchRegisterAltName(Name); ++ void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } + +- return RegNo == LoongArch::NoRegister; +-} ++ void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } + +-bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, +- SMLoc &EndLoc) { +- return Error(getLoc(), "invalid register number"); +-} ++ void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } + +-OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, +- SMLoc &StartLoc, +- SMLoc &EndLoc) { +- llvm_unreachable("Unimplemented function."); +-} ++ void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } + +-OperandMatchResultTy +-LoongArchAsmParser::parseRegister(OperandVector &Operands) { +- if (getLexer().getTok().isNot(AsmToken::Dollar)) +- return MatchOperand_NoMatch; ++ void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst, ++ unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } + +- // Eat the $ prefix. +- getLexer().Lex(); +- if (getLexer().getKind() != AsmToken::Identifier) +- return MatchOperand_NoMatch; ++ /// Render the operand to an MCInst as a GPR64 ++ /// Asserts if the wrong number of operands are requested, or the operand ++ /// is not a k_RegisterIndex compatible with RegKind_GPR ++ void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR64Reg())); ++ } + +- StringRef Name = getLexer().getTok().getIdentifier(); +- MCRegister RegNo; +- matchRegisterNameHelper(RegNo, Name); +- if (RegNo == LoongArch::NoRegister) +- return MatchOperand_NoMatch; ++ void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR64Reg())); ++ } + +- SMLoc S = getLoc(); +- SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); +- getLexer().Lex(); +- Operands.push_back(LoongArchOperand::createReg(RegNo, S, E)); ++ void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR64Reg())); ++ } + +- return MatchOperand_Success; +-} ++ void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR32Reg())); ++ } + +-OperandMatchResultTy +-LoongArchAsmParser::parseImmediate(OperandVector &Operands) { +- SMLoc S = getLoc(); +- SMLoc E; +- const MCExpr *Res; ++ void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR32Reg())); ++ } + +- if (getParser().parseExpression(Res, E)) +- return MatchOperand_ParseFail; ++ void addFCFRAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFCFRReg())); ++ } + +- Operands.push_back(LoongArchOperand::createImm(Res, S, E)); +- return MatchOperand_Success; +-} ++ void addLSX128AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getLSX128Reg())); ++ } + +-/// Looks at a token type and creates the relevant operand from this +-/// information, adding to Operands. Return true upon an error. +-bool LoongArchAsmParser::parseOperand(OperandVector &Operands, +- StringRef Mnemonic) { +- if (parseRegister(Operands) == MatchOperand_Success || +- parseImmediate(Operands) == MatchOperand_Success) +- return false; ++ void addLASX256AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getLASX256Reg())); ++ } + +- // Finally we have exhausted all options and must declare defeat. +- Error(getLoc(), "unknown operand"); +- return true; +-} ++ void addFCSRAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFCSRReg())); ++ } + +-bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, +- StringRef Name, SMLoc NameLoc, +- OperandVector &Operands) { +- // First operand in MCInst is instruction mnemonic. +- Operands.push_back(LoongArchOperand::createToken(Name, NameLoc)); ++ template ++ void addConstantUImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ uint64_t Imm = getConstantImm() - Offset; ++ Imm &= (1ULL << Bits) - 1; ++ Imm += Offset; ++ Imm += AdjustOffset; ++ Inst.addOperand(MCOperand::createImm(Imm)); ++ } + +- // If there are no more operands, then finish. +- if (parseOptionalToken(AsmToken::EndOfStatement)) +- return false; ++ template ++ void addSImmOperands(MCInst &Inst, unsigned N) const { ++ if (isImm() && !isConstantImm()) { ++ addExpr(Inst, getImm()); ++ return; ++ } ++ addConstantSImmOperands(Inst, N); ++ } + +- // Parse first operand. +- if (parseOperand(Operands, Name)) +- return true; ++ template ++ void addUImmOperands(MCInst &Inst, unsigned N) const { ++ if (isImm() && !isConstantImm()) { ++ addExpr(Inst, getImm()); ++ return; ++ } ++ addConstantUImmOperands(Inst, N); ++ } + +- // Parse until end of statement, consuming commas between operands. +- while (parseOptionalToken(AsmToken::Comma)) +- if (parseOperand(Operands, Name)) +- return true; ++ template ++ void addConstantSImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ int64_t Imm = getConstantImm() - Offset; ++ Imm = SignExtend64(Imm); ++ Imm += Offset; ++ Imm += AdjustOffset; ++ Inst.addOperand(MCOperand::createImm(Imm)); ++ } + +- // Parse end of statement and return successfully. +- if (parseOptionalToken(AsmToken::EndOfStatement)) +- return false; ++ void addImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ const MCExpr *Expr = getImm(); ++ addExpr(Inst, Expr); ++ } + +- SMLoc Loc = getLexer().getLoc(); +- getParser().eatToEndOfStatement(); +- return Error(Loc, "unexpected token"); +-} ++ void addMemOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 2 && "Invalid number of operands!"); + +-bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, +- OperandVector &Operands, +- MCStreamer &Out) { +- Inst.setLoc(IDLoc); +- Out.emitInstruction(Inst, getSTI()); +- return false; +-} ++ Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit() ++ ? getMemBase()->getGPR64Reg() ++ : getMemBase()->getGPR32Reg())); + +-unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { +- switch (Inst.getOpcode()) { +- default: +- break; +- case LoongArch::CSRXCHG: { +- unsigned Rj = Inst.getOperand(2).getReg(); +- if (Rj == LoongArch::R0 || Rj == LoongArch::R1) +- return Match_RequiresOpnd2NotR0R1; +- return Match_Success; ++ const MCExpr *Expr = getMemOff(); ++ addExpr(Inst, Expr); + } +- case LoongArch::BSTRINS_W: +- case LoongArch::BSTRINS_D: +- case LoongArch::BSTRPICK_W: +- case LoongArch::BSTRPICK_D: { +- unsigned Opc = Inst.getOpcode(); +- const signed Msb = +- (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) +- ? Inst.getOperand(3).getImm() +- : Inst.getOperand(2).getImm(); +- const signed Lsb = +- (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) +- ? Inst.getOperand(4).getImm() +- : Inst.getOperand(3).getImm(); +- if (Msb < Lsb) +- return Match_RequiresMsbNotLessThanLsb; +- return Match_Success; ++ ++ void addRegListOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ ++ for (auto RegNo : getRegList()) ++ Inst.addOperand(MCOperand::createReg(RegNo)); + } ++ ++ bool isReg() const override { ++ // As a special case until we sort out the definition of div/divu, accept ++ // $0/$zero here so that MCK_ZERO works correctly. ++ return isGPRAsmReg() && RegIdx.Index == 0; + } + +- return Match_Success; +-} ++ bool isRegIdx() const { return Kind == k_RegisterIndex; } ++ bool isImm() const override { return Kind == k_Immediate; } + +-unsigned +-LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, +- unsigned Kind) { +- LoongArchOperand &Op = static_cast(AsmOp); +- if (!Op.isReg()) +- return Match_InvalidOperand; ++ bool isConstantImm() const { ++ int64_t Res; ++ return isImm() && getImm()->evaluateAsAbsolute(Res); ++ } + +- MCRegister Reg = Op.getReg(); +- // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the +- // register from FPR32 to FPR64 if necessary. +- if (LoongArchMCRegisterClasses[LoongArch::FPR32RegClassID].contains(Reg) && +- Kind == MCK_FPR64) { +- Op.setReg(convertFPR32ToFPR64(Reg)); +- return Match_Success; ++ bool isConstantImmz() const { ++ return isConstantImm() && getConstantImm() == 0; + } + +- return Match_InvalidOperand; +-} ++ template bool isConstantUImm() const { ++ return isConstantImm() && isUInt(getConstantImm() - Offset); ++ } + +-bool LoongArchAsmParser::generateImmOutOfRangeError( +- OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper, +- Twine Msg = "immediate must be an integer in the range") { +- SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); +- return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); +-} ++ template bool isSImm() const { ++ return isConstantImm() ? isInt(getConstantImm()) : isImm(); ++ } + +-bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +- OperandVector &Operands, +- MCStreamer &Out, +- uint64_t &ErrorInfo, +- bool MatchingInlineAsm) { +- MCInst Inst; +- FeatureBitset MissingFeatures; ++ template bool isUImm() const { ++ return isConstantImm() ? isUInt(getConstantImm()) : isImm(); ++ } + +- auto Result = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures, +- MatchingInlineAsm); +- switch (Result) { +- default: +- break; +- case Match_Success: +- return processInstruction(Inst, IDLoc, Operands, Out); +- case Match_MissingFeature: { +- assert(MissingFeatures.any() && "Unknown missing features!"); +- bool FirstFeature = true; +- std::string Msg = "instruction requires the following:"; +- for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { +- if (MissingFeatures[i]) { +- Msg += FirstFeature ? " " : ", "; +- Msg += getSubtargetFeatureName(i); +- FirstFeature = false; +- } +- } +- return Error(IDLoc, Msg); ++ template bool isAnyImm() const { ++ return isConstantImm() ? (isInt(getConstantImm()) || ++ isUInt(getConstantImm())) ++ : isImm(); + } +- case Match_MnemonicFail: { +- FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); +- std::string Suggestion = LoongArchMnemonicSpellCheck( +- ((LoongArchOperand &)*Operands[0]).getToken(), FBS, 0); +- return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion); ++ ++ template bool isConstantSImm() const { ++ return isConstantImm() && isInt(getConstantImm() - Offset); ++ } ++ ++ template bool isConstantUImmRange() const { ++ return isConstantImm() && getConstantImm() >= Bottom && ++ getConstantImm() <= Top; ++ } ++ ++ bool isToken() const override { ++ // Note: It's not possible to pretend that other operand kinds are tokens. ++ // The matcher emitter checks tokens first. ++ return Kind == k_Token; ++ } ++ ++ bool isMem() const override { return Kind == k_Memory; } ++ ++ bool isConstantMemOff() const { ++ return isMem() && isa(getMemOff()); ++ } ++ ++ bool isZeroMemOff() const { ++ return isMem() && isa(getMemOff()) && ++ getConstantMemOff() == 0; ++ } ++ ++ // Allow relocation operators. ++ // FIXME: This predicate and others need to look through binary expressions ++ // and determine whether a Value is a constant or not. ++ template ++ bool isMemWithSimmOffset() const { ++ if (!isMem()) ++ return false; ++ if (!getMemBase()->isGPRAsmReg()) ++ return false; ++ if (isa(getMemOff()) || ++ (isConstantMemOff() && ++ isShiftedInt(getConstantMemOff()))) ++ return true; ++ MCValue Res; ++ bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return IsReloc && isShiftedInt(Res.getConstant()); ++ } ++ ++ bool isMemWithPtrSizeOffset() const { ++ if (!isMem()) ++ return false; ++ if (!getMemBase()->isGPRAsmReg()) ++ return false; ++ const unsigned PtrBits = AsmParser.getABI().ArePtrs64bit() ? 64 : 32; ++ if (isa(getMemOff()) || ++ (isConstantMemOff() && isIntN(PtrBits, getConstantMemOff()))) ++ return true; ++ MCValue Res; ++ bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return IsReloc && isIntN(PtrBits, Res.getConstant()); ++ } ++ ++ bool isMemWithGRPMM16Base() const { ++ return isMem() && getMemBase()->isMM16AsmReg(); ++ } ++ ++ template bool isMemWithUimmOffsetSP() const { ++ return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) ++ && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == LoongArch::SP); ++ } ++ ++ template bool isMemWithUimmWordAlignedOffsetSP() const { ++ return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) ++ && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx() ++ && (getMemBase()->getGPR32Reg() == LoongArch::SP); ++ } ++ ++ template ++ bool isScaledUImm() const { ++ return isConstantImm() && ++ isShiftedUInt(getConstantImm()); ++ } ++ ++ template ++ bool isScaledSImm() const { ++ if (isConstantImm() && ++ isShiftedInt(getConstantImm())) ++ return true; ++ // Operand can also be a symbol or symbol plus ++ // offset in case of relocations. ++ if (Kind != k_Immediate) ++ return false; ++ MCValue Res; ++ bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return Success && isShiftedInt(Res.getConstant()); ++ } ++ ++ bool isRegList16() const { ++ if (!isRegList()) ++ return false; ++ ++ int Size = RegList.List->size(); ++ if (Size < 2 || Size > 5) ++ return false; ++ ++ unsigned R0 = RegList.List->front(); ++ unsigned R1 = RegList.List->back(); ++ if (!((R0 == LoongArch::S0 && R1 == LoongArch::RA) || ++ (R0 == LoongArch::S0_64 && R1 == LoongArch::RA_64))) ++ return false; ++ ++ int PrevReg = *RegList.List->begin(); ++ for (int i = 1; i < Size - 1; i++) { ++ int Reg = (*(RegList.List))[i]; ++ if ( Reg != PrevReg + 1) ++ return false; ++ PrevReg = Reg; ++ } ++ ++ return true; ++ } ++ ++ bool isInvNum() const { return Kind == k_Immediate; } ++ ++ bool isLSAImm() const { ++ if (!isConstantImm()) ++ return false; ++ int64_t Val = getConstantImm(); ++ return 1 <= Val && Val <= 4; ++ } ++ ++ bool isRegList() const { return Kind == k_RegList; } ++ ++ StringRef getToken() const { ++ assert(Kind == k_Token && "Invalid access!"); ++ return StringRef(Tok.Data, Tok.Length); ++ } ++ ++ unsigned getReg() const override { ++ // As a special case until we sort out the definition of div/divu, accept ++ // $0/$zero here so that MCK_ZERO works correctly. ++ if (Kind == k_RegisterIndex && RegIdx.Index == 0 && ++ RegIdx.Kind & RegKind_GPR) ++ return getGPR32Reg(); // FIXME: GPR64 too ++ ++ llvm_unreachable("Invalid access!"); ++ return 0; ++ } ++ ++ const MCExpr *getImm() const { ++ assert((Kind == k_Immediate) && "Invalid access!"); ++ return Imm.Val; ++ } ++ ++ int64_t getConstantImm() const { ++ const MCExpr *Val = getImm(); ++ int64_t Value = 0; ++ (void)Val->evaluateAsAbsolute(Value); ++ return Value; ++ } ++ ++ LoongArchOperand *getMemBase() const { ++ assert((Kind == k_Memory) && "Invalid access!"); ++ return Mem.Base; ++ } ++ ++ const MCExpr *getMemOff() const { ++ assert((Kind == k_Memory) && "Invalid access!"); ++ return Mem.Off; ++ } ++ ++ int64_t getConstantMemOff() const { ++ return static_cast(getMemOff())->getValue(); ++ } ++ ++ const SmallVectorImpl &getRegList() const { ++ assert((Kind == k_RegList) && "Invalid access!"); ++ return *(RegList.List); ++ } ++ ++ static std::unique_ptr CreateToken(StringRef Str, SMLoc S, ++ LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Token, Parser); ++ Op->Tok.Data = Str.data(); ++ Op->Tok.Length = Str.size(); ++ Op->StartLoc = S; ++ Op->EndLoc = S; ++ return Op; ++ } ++ ++ /// Create a numeric register (e.g. $1). The exact register remains ++ /// unresolved until an instruction successfully matches ++ static std::unique_ptr ++ createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ LLVM_DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n"); ++ return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely a GPR. ++ /// This is typically only used for named registers such as $gp. ++ static std::unique_ptr ++ createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely a FGR. ++ /// This is typically only used for named registers such as $f0. ++ static std::unique_ptr ++ createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an FCFR. ++ /// This is typically only used for named registers such as $fcc0. ++ static std::unique_ptr ++ createFCFRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FCFR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an FCSR. ++ /// This is typically only used for named registers such as $fcsr0. ++ static std::unique_ptr ++ createFCSRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FCSR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an LSX128. ++ /// This is typically only used for named registers such as $v0. ++ static std::unique_ptr ++ createLSX128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_LSX128, RegInfo, S, E, Parser); ++ } ++ ++ static std::unique_ptr ++ createLASX256Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_LASX256, RegInfo, S, E, Parser); ++ } ++ ++ static std::unique_ptr ++ CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Immediate, Parser); ++ Op->Imm.Val = Val; ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; ++ } ++ ++ static std::unique_ptr ++ CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, ++ SMLoc E, LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Memory, Parser); ++ Op->Mem.Base = Base.release(); ++ Op->Mem.Off = Off; ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; ++ } ++ ++ static std::unique_ptr ++ CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, ++ LoongArchAsmParser &Parser) { ++ assert(Regs.size() > 0 && "Empty list not allowed"); ++ ++ auto Op = std::make_unique(k_RegList, Parser); ++ Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); ++ Op->StartLoc = StartLoc; ++ Op->EndLoc = EndLoc; ++ return Op; ++ } ++ ++ bool isGPRZeroAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0; ++ } ++ ++ bool isGPRNonZeroAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 && ++ RegIdx.Index <= 31; ++ } ++ ++ bool isGPRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31; ++ } ++ ++ bool isMM16AsmReg() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return ((RegIdx.Index >= 2 && RegIdx.Index <= 7) ++ || RegIdx.Index == 16 || RegIdx.Index == 17); ++ ++ } ++ bool isMM16AsmRegZero() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 0 || ++ (RegIdx.Index >= 2 && RegIdx.Index <= 7) || ++ RegIdx.Index == 17); ++ } ++ ++ bool isMM16AsmRegMoveP() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) || ++ (RegIdx.Index >= 16 && RegIdx.Index <= 20)); ++ } ++ ++ bool isMM16AsmRegMovePPairFirst() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return RegIdx.Index >= 4 && RegIdx.Index <= 6; ++ } ++ ++ bool isMM16AsmRegMovePPairSecond() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 21 || RegIdx.Index == 22 || ++ (RegIdx.Index >= 5 && RegIdx.Index <= 7)); ++ } ++ ++ bool isFGRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; ++ } ++ ++ bool isStrictlyFGRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; ++ } ++ ++ bool isFCSRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_FCSR && RegIdx.Index <= 3; ++ } ++ ++ bool isFCFRAsmReg() const { ++ if (!(isRegIdx() && RegIdx.Kind & RegKind_FCFR)) ++ return false; ++ return RegIdx.Index <= 7; ++ } ++ ++ bool isLSX128AsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_LSX128 && RegIdx.Index <= 31; ++ } ++ ++ bool isLASX256AsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_LASX256 && RegIdx.Index <= 31; ++ } ++ ++ /// getStartLoc - Get the location of the first token of this operand. ++ SMLoc getStartLoc() const override { return StartLoc; } ++ /// getEndLoc - Get the location of the last token of this operand. ++ SMLoc getEndLoc() const override { return EndLoc; } ++ ++ void print(raw_ostream &OS) const override { ++ switch (Kind) { ++ case k_Immediate: ++ OS << "Imm<"; ++ OS << *Imm.Val; ++ OS << ">"; ++ break; ++ case k_Memory: ++ OS << "Mem<"; ++ Mem.Base->print(OS); ++ OS << ", "; ++ OS << *Mem.Off; ++ OS << ">"; ++ break; ++ case k_RegisterIndex: ++ OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", " ++ << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; ++ break; ++ case k_Token: ++ OS << getToken(); ++ break; ++ case k_RegList: ++ OS << "RegList< "; ++ for (auto Reg : (*RegList.List)) ++ OS << Reg << " "; ++ OS << ">"; ++ break; ++ } ++ } ++ ++ bool isValidForTie(const LoongArchOperand &Other) const { ++ if (Kind != Other.Kind) ++ return false; ++ ++ switch (Kind) { ++ default: ++ llvm_unreachable("Unexpected kind"); ++ return false; ++ case k_RegisterIndex: { ++ StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length); ++ StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length); ++ return Token == OtherToken; ++ } ++ } ++ } ++}; // class LoongArchOperand ++ ++} // end anonymous namespace ++ ++namespace llvm { ++ ++extern const MCInstrDesc LoongArchInsts[]; ++ ++} // end namespace llvm ++ ++static const MCInstrDesc &getInstDesc(unsigned Opcode) { ++ return LoongArchInsts[Opcode]; ++} ++ ++static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) { ++ if (const MCSymbolRefExpr *SRExpr = dyn_cast(Expr)) { ++ return &SRExpr->getSymbol(); ++ } ++ ++ if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) { ++ const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS()); ++ const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS()); ++ ++ if (LHSSym) ++ return LHSSym; ++ ++ if (RHSSym) ++ return RHSSym; ++ ++ return nullptr; ++ } ++ ++ if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) ++ return getSingleMCSymbol(UExpr->getSubExpr()); ++ ++ return nullptr; ++} ++ ++static unsigned countMCSymbolRefExpr(const MCExpr *Expr) { ++ if (isa(Expr)) ++ return 1; ++ ++ if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) ++ return countMCSymbolRefExpr(BExpr->getLHS()) + ++ countMCSymbolRefExpr(BExpr->getRHS()); ++ ++ if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) ++ return countMCSymbolRefExpr(UExpr->getSubExpr()); ++ ++ return 0; ++} ++ ++bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); ++ ++ Inst.setLoc(IDLoc); ++ ++ // Check branch instructions. ++ if (MCID.isBranch() || MCID.isCall()) { ++ const unsigned Opcode = Inst.getOpcode(); ++ MCOperand Offset; ++ bool check = true; ++ unsigned OffsetOpndIdx, OffsetOpndWidth; ++ switch (Opcode) { ++ default: ++ check = false; ++ break; ++ case LoongArch::BEQ: ++ case LoongArch::BNE: ++ case LoongArch::BLT: ++ case LoongArch::BGE: ++ case LoongArch::BLTU: ++ case LoongArch::BGEU: ++ OffsetOpndIdx = 2; ++ OffsetOpndWidth = 16; ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BNEZ: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ OffsetOpndIdx = 1; ++ OffsetOpndWidth = 21; ++ break; ++ case LoongArch::B: ++ case LoongArch::BL: ++ OffsetOpndIdx = 0; ++ OffsetOpndWidth = 26; ++ break; ++ } ++ if (check) { ++ assert(MCID.getNumOperands() == OffsetOpndIdx + 1 && ++ "unexpected number of operands"); ++ Offset = Inst.getOperand(OffsetOpndIdx); ++ // Non-Imm situation will be dealed with later on when applying fixups. ++ if (Offset.isImm()) { ++ if (!isIntN(OffsetOpndWidth + 2, Offset.getImm())) ++ return Error(IDLoc, "branch target out of range"); ++ if (offsetToAlignment(Offset.getImm(), Align(1LL << 2))) ++ return Error(IDLoc, "branch to misaligned address"); ++ } ++ } ++ } ++ ++ bool IsPCRelativeLoad = (MCID.TSFlags & LoongArchII::IsPCRelativeLoad) != 0; ++ if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) { ++ // Check the offset of memory operand, if it is a symbol ++ // reference or immediate we may have to expand instructions. ++ for (unsigned i = 0; i < MCID.getNumOperands(); i++) { ++ const MCOperandInfo &OpInfo = MCID.OpInfo[i]; ++ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || ++ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { ++ MCOperand &Op = Inst.getOperand(i); ++ if (Op.isImm()) { ++ int64_t MemOffset = Op.getImm(); ++ if (MemOffset < -32768 || MemOffset > 32767) { ++ return getParser().hasPendingError(); ++ } ++ } else if (Op.isExpr()) { ++ const MCExpr *Expr = Op.getExpr(); ++ if (Expr->getKind() == MCExpr::SymbolRef) { ++ const MCSymbolRefExpr *SR = ++ static_cast(Expr); ++ if (SR->getKind() == MCSymbolRefExpr::VK_None) { ++ return getParser().hasPendingError(); ++ } ++ } else if (!isEvaluated(Expr)) { ++ return getParser().hasPendingError(); ++ } ++ } ++ } ++ } // for ++ } // if load/store ++ ++ MacroExpanderResultTy ExpandResult = ++ tryExpandInstruction(Inst, IDLoc, Out, STI); ++ switch (ExpandResult) { ++ case MER_NotAMacro: ++ Out.emitInstruction(Inst, *STI); ++ break; ++ case MER_Success: ++ break; ++ case MER_Fail: ++ return true; + } ++ ++ return false; ++} ++ ++LoongArchAsmParser::MacroExpanderResultTy ++LoongArchAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ switch (Inst.getOpcode()) { ++ default: ++ return MER_NotAMacro; ++ case LoongArch::LoadImm32: // li.w $rd, $imm32 ++ case LoongArch::LoadImm64: // li.d $rd, $imm64 ++ return expandLoadImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; ++ case LoongArch::LoadAddrLocal: // la.local $rd, symbol ++ case LoongArch::LoadAddrGlobal: // la.global $rd, symbol ++ case LoongArch::LoadAddrGlobal_Alias: // la $rd, symbol ++ case LoongArch::LoadAddrTLS_LE: // la.tls.le $rd, symbol ++ case LoongArch::LoadAddrTLS_IE: // la.tls.ie $rd, symbol ++ case LoongArch::LoadAddrTLS_LD: // la.tls.ld $rd, symbol ++ case LoongArch::LoadAddrTLS_GD: // la.tls.gd $rd, symbol ++ return expandLoadAddress(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; ++ } ++} ++ ++/// Can the value be represented by a unsigned N-bit value and a shift left? ++template static bool isShiftedUIntAtAnyPosition(uint64_t x) { ++ unsigned BitNum = findFirstSet(x); ++ ++ return (x == x >> BitNum << BitNum) && isUInt(x >> BitNum); ++} ++ ++bool LoongArchAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ const int64_t Imm = Inst.getOperand(1).getImm(); ++ const unsigned DstReg = Inst.getOperand(0).getReg(); ++ LoongArchTargetStreamer &TOut = getTargetStreamer(); ++ bool Is64Bit = Inst.getOpcode() == LoongArch::LoadImm64; ++ unsigned SrcReg = Is64Bit ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq( ++ Is64Bit ? Imm : SignExtend64<32>(Imm), Is64Bit); ++ ++ for (auto &Inst : Seq) { ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ TOut.emitRI(Inst.Opc, DstReg, Inst.Imm, IDLoc, STI); ++ else ++ TOut.emitRRI(Inst.Opc, DstReg, SrcReg, Inst.Imm, IDLoc, STI); ++ SrcReg = DstReg; ++ } ++ ++ return false; ++} ++ ++bool LoongArchAsmParser::expandLoadAddress(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ LoongArchTargetStreamer &TOut = getTargetStreamer(); ++ const MCExpr *SymExpr = Inst.getOperand(1).getExpr(); ++ const LoongArchMCExpr *HiExpr = nullptr; ++ const LoongArchMCExpr *LoExpr = nullptr; ++ const LoongArchMCExpr *HigherExpr = nullptr; ++ const LoongArchMCExpr *HighestExpr = nullptr; ++ const MCExpr *GotExpr = MCSymbolRefExpr::create( ++ "_GLOBAL_OFFSET_TABLE_", MCSymbolRefExpr::VK_None, getContext()); ++ unsigned DstReg = Inst.getOperand(0).getReg(); ++ ++ MCValue Res; ++ if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { ++ Error(IDLoc, "expected relocatable expression"); ++ return true; ++ } ++ if (Res.getSymB() != nullptr) { ++ Error(IDLoc, "expected relocatable expression with only one symbol"); ++ return true; ++ } ++ ++ switch (Inst.getOpcode()) { ++ case LoongArch::LoadAddrLocal: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_LO, SymExpr, ++ getContext()); ++ ++ TOut.emitRX(LoongArch::PCADDU12I_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobal_Alias: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_LO, SymExpr, ++ getContext()); ++ TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, ++ MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ TOut.emitRRXX(LoongArch::LD_D_rrii, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_LE: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_LO, SymExpr, ++ getContext()); ++ HigherExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHER, ++ SymExpr, getContext()); ++ HighestExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHEST, ++ SymExpr, getContext()); ++ TOut.emitRX(LoongArch::LU12I_W_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::ORI_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ TOut.emitRX(LoongArch::LU32I_D_ri, DstReg, ++ MCOperand::createExpr(HigherExpr), IDLoc, STI); ++ TOut.emitRRX(LoongArch::LU52I_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(HighestExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_IE: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_LO, SymExpr, ++ getContext()); ++ TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, ++ MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ TOut.emitRRXX(LoongArch::LD_D_rrii, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_GD: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_LO, SymExpr, ++ getContext()); ++ TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, ++ MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ TOut.emitRRXX(LoongArch::ADDI_D_rrii, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), ++ IDLoc, STI); ++ return false; ++ default: ++ llvm_unreachable(""); ++ } ++} ++ ++unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { ++ switch (Inst.getOpcode()) { ++ case LoongArch::BSTRINS_W: ++ case LoongArch::BSTRPICK_W: { ++ assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && ++ "Operands must be immediates for bstrins.w/bstrpick.w!"); ++ const signed Msbw = Inst.getOperand(2).getImm(); ++ const signed Lsbw = Inst.getOperand(3).getImm(); ++ if (Msbw < Lsbw) ++ return Match_MsbHigherThanLsb; ++ if ((Lsbw < 0) || (Msbw > 31)) ++ return Match_RequiresRange0_31; ++ return Match_Success; ++ } ++ case LoongArch::BSTRINS_D: ++ case LoongArch::BSTRPICK_D: { ++ assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && ++ "Operands must be immediates for bstrins.d/bstrpick.d!"); ++ const signed Msbd = Inst.getOperand(2).getImm(); ++ const signed Lsbd = Inst.getOperand(3).getImm(); ++ if (Msbd < Lsbd) ++ return Match_MsbHigherThanLsb; ++ if ((Lsbd < 0) || (Msbd > 63)) ++ return Match_RequiresRange0_63; ++ return Match_Success; ++ } ++ case LoongArch::CSRXCHG32: ++ case LoongArch::CSRXCHG: ++ if (Inst.getOperand(2).getReg() == LoongArch::ZERO || ++ Inst.getOperand(2).getReg() == LoongArch::ZERO_64) ++ return Match_RequiresNoZeroRegister; ++ if (Inst.getOperand(2).getReg() == LoongArch::RA || ++ Inst.getOperand(2).getReg() == LoongArch::RA_64) ++ return Match_RequiresNoRaRegister; ++ return Match_Success; ++ } ++ ++ return Match_Success; ++} ++ ++static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, ++ uint64_t ErrorInfo) { ++ if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) { ++ SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc(); ++ if (ErrorLoc == SMLoc()) ++ return Loc; ++ return ErrorLoc; ++ } ++ return Loc; ++} ++ ++bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ++ OperandVector &Operands, ++ MCStreamer &Out, ++ uint64_t &ErrorInfo, ++ bool MatchingInlineAsm) { ++ MCInst Inst; ++ unsigned MatchResult = ++ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); ++ switch (MatchResult) { ++ case Match_Success: ++ if (processInstruction(Inst, IDLoc, Out, STI)) ++ return true; ++ return false; ++ case Match_MissingFeature: ++ Error(IDLoc, "instruction requires a CPU feature not currently enabled"); ++ return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) +- return Error(ErrorLoc, "too few operands for instruction"); ++ return Error(IDLoc, "too few operands for instruction"); + +- ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); ++ ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } ++ + return Error(ErrorLoc, "invalid operand for instruction"); + } ++ case Match_MnemonicFail: ++ return Error(IDLoc, "invalid instruction"); ++ case Match_RequiresNoZeroRegister: ++ return Error(IDLoc, "invalid operand ($zero) for instruction"); ++ case Match_RequiresNoRaRegister: ++ return Error(IDLoc, "invalid operand ($r1) for instruction"); ++ case Match_InvalidImm0_3: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 3]."); ++ case Match_InvalidImm0_7: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 7]."); ++ case Match_InvalidImm0_31: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 31]."); ++ case Match_InvalidImm0_63: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 63]."); ++ case Match_InvalidImm0_4095: ++ case Match_UImm12_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 4095]."); ++ case Match_InvalidImm0_32767: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 32767]."); ++ case Match_UImm16_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 16-bit unsigned immediate"); ++ case Match_UImm20_0: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 20-bit unsigned immediate"); ++ case Match_UImm26_0: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 26-bit unsigned immediate"); ++ case Match_UImm32_Coerced: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 32-bit immediate"); ++ case Match_InvalidSImm2: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 2-bit signed immediate"); ++ case Match_InvalidSImm3: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 3-bit signed immediate"); ++ case Match_InvalidSImm5: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 5-bit signed immediate"); ++ case Match_InvalidSImm8: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 8-bit signed immediate"); ++ case Match_InvalidSImm12: ++ case Match_SImm12_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 12-bit signed immediate"); ++ case Match_InvalidSImm14: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 14-bit signed immediate"); ++ case Match_InvalidSImm15: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 15-bit signed immediate"); ++ case Match_InvalidSImm16: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 16-bit signed immediate"); ++ case Match_InvalidSImm20: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 20-bit signed immediate"); ++ case Match_InvalidSImm21: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 21-bit signed immediate"); ++ case Match_InvalidSImm26: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 26-bit signed immediate"); ++ case Match_SImm32: ++ case Match_SImm32_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 32-bit signed immediate"); ++ case Match_MemSImm14: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 14-bit signed offset"); ++ case Match_MemSImmPtr: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 32-bit signed offset"); ++ case Match_UImm2_1: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected immediate in range 1 .. 4"); ++ case Match_MemSImm14Lsl2: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 16-bit signed offset and multiple of 4"); ++ case Match_RequiresRange0_31: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "from lsbw to msbw are not in the range 0 .. 31", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_RequiresPosSizeUImm6: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "size plus position are not in the range 1 .. 63", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_RequiresRange0_63: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "from lsbd to msbd are not in the range 0 .. 63", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_MsbHigherThanLsb: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "msb are not higher than lsb", SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_MemZeroOff: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with constant 0 offset"); + } + +- // Handle the case when the error message is of specific type +- // other than the generic Match_InvalidOperand, and the +- // corresponding operand is missing. +- if (Result > FIRST_TARGET_MATCH_RESULT_TY) { +- SMLoc ErrorLoc = IDLoc; +- if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size()) +- return Error(ErrorLoc, "too few operands for instruction"); ++ llvm_unreachable("Implement any new match types added!"); ++} ++ ++/* ++ * Note: The implementation of this function must be sync with the definition ++ * of GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ */ ++int LoongArchAsmParser::matchCPURegisterName(StringRef Name) { ++ int CC; ++ ++ CC = StringSwitch(Name) ++ .Cases("zero", "r0", 0) ++ .Cases("a0", "v0", "r4", 1) ++ .Cases("a1", "v1", "r5", 2) ++ .Cases("a2", "r6", 3) ++ .Cases("a3", "r7", 4) ++ .Cases("a4", "r8", 5) ++ .Cases("a5", "r9", 6) ++ .Cases("a6", "r10", 7) ++ .Cases("a7", "r11", 8) ++ .Cases("t0", "r12", 9) ++ .Cases("t1", "r13", 10) ++ .Cases("t2", "r14", 11) ++ .Cases("t3", "r15", 12) ++ .Cases("t4", "r16", 13) ++ .Cases("t5", "r17", 14) ++ .Cases("t6", "r18", 15) ++ .Cases("t7", "r19", 16) ++ .Cases("t8", "r20", 17) ++ .Cases("s0", "r23", 18) ++ .Cases("s1", "r24", 19) ++ .Cases("s2", "r25", 20) ++ .Cases("s3", "r26", 21) ++ .Cases("s4", "r27", 22) ++ .Cases("s5", "r28", 23) ++ .Cases("s6", "r29", 24) ++ .Cases("s7", "r30", 25) ++ .Cases("s8", "r31", 26) ++ .Cases("ra", "r1", 27) ++ .Cases("tp", "r2", 28) ++ .Cases("sp", "r3", 29) ++ .Case("r21", 30) ++ .Cases("fp", "r22", 31) ++ .Default(-1); ++ ++ return CC; ++} ++ ++int LoongArchAsmParser::matchFPURegisterName(StringRef Name) { ++ if (Name[0] == 'f') { ++ int CC; ++ ++ CC = StringSwitch(Name) ++ .Cases("f0", "fa0", "fv0", 0) ++ .Cases("f1", "fa1", "fv1", 1) ++ .Cases("f2", "fa2", 2) ++ .Cases("f3", "fa3", 3) ++ .Cases("f4", "fa4", 4) ++ .Cases("f5", "fa5", 5) ++ .Cases("f6", "fa6", 6) ++ .Cases("f7", "fa7", 7) ++ .Cases("f8", "ft0", 8) ++ .Cases("f9", "ft1", 9) ++ .Cases("f10", "ft2", 10) ++ .Cases("f11", "ft3", 11) ++ .Cases("f12", "ft4", 12) ++ .Cases("f13", "ft5", 13) ++ .Cases("f14", "ft6", 14) ++ .Cases("f15", "ft7", 15) ++ .Cases("f16", "ft8", 16) ++ .Cases("f17", "ft9", 17) ++ .Cases("f18", "ft10", 18) ++ .Cases("f19", "ft11", 19) ++ .Cases("f20", "ft12", 20) ++ .Cases("f21", "ft13", 21) ++ .Cases("f22", "ft14", 22) ++ .Cases("f23", "ft15", 23) ++ .Cases("f24", "fs0", 24) ++ .Cases("f25", "fs1", 25) ++ .Cases("f26", "fs2", 26) ++ .Cases("f27", "fs3", 27) ++ .Cases("f28", "fs4", 28) ++ .Cases("f29", "fs5", 29) ++ .Cases("f30", "fs6", 30) ++ .Cases("f31", "fs7", 31) ++ .Default(-1); ++ ++ return CC; + } ++ return -1; ++} + +- switch (Result) { +- default: +- break; +- case Match_RequiresMsbNotLessThanLsb: { +- SMLoc ErrorStart = Operands[3]->getStartLoc(); +- return Error(ErrorStart, "msb is less than lsb", +- SMRange(ErrorStart, Operands[4]->getEndLoc())); +- } +- case Match_RequiresOpnd2NotR0R1: +- return Error(Operands[2]->getStartLoc(), "must not be $r0 or $r1"); +- case Match_InvalidUImm2: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 2) - 1); +- case Match_InvalidUImm2plus1: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/1, +- /*Upper=*/(1 << 2)); +- case Match_InvalidUImm3: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 3) - 1); +- case Match_InvalidUImm5: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 5) - 1); +- case Match_InvalidUImm6: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 6) - 1); +- case Match_InvalidUImm12: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 12) - 1); +- case Match_InvalidUImm15: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, +- /*Upper=*/(1 << 15) - 1); +- case Match_InvalidSImm12: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11), +- /*Upper=*/(1 << 11) - 1); +- case Match_InvalidSImm14lsl2: +- return generateImmOutOfRangeError( +- Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4, +- "immediate must be a multiple of 4 in the range"); +- case Match_InvalidSImm16: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 15), +- /*Upper=*/(1 << 15) - 1); +- case Match_InvalidSImm16lsl2: +- return generateImmOutOfRangeError( +- Operands, ErrorInfo, /*Lower=*/-(1 << 17), /*Upper=*/(1 << 17) - 4, +- "immediate must be a multiple of 4 in the range"); +- case Match_InvalidSImm20: +- return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 19), +- /*Upper=*/(1 << 19) - 1); +- case Match_InvalidSImm21lsl2: +- return generateImmOutOfRangeError( +- Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, +- "immediate must be a multiple of 4 in the range"); +- case Match_InvalidSImm26lsl2: +- return generateImmOutOfRangeError( +- Operands, ErrorInfo, /*Lower=*/-(1 << 27), /*Upper=*/(1 << 27) - 4, +- "immediate must be a multiple of 4 in the range"); ++int LoongArchAsmParser::matchFCFRRegisterName(StringRef Name) { ++ if (Name.startswith("fcc")) { ++ StringRef NumString = Name.substr(3); ++ unsigned IntVal; ++ if (NumString.getAsInteger(10, IntVal)) ++ return -1; // This is not an integer. ++ if (IntVal > 7) // There are only 8 fcc registers. ++ return -1; ++ return IntVal; ++ } ++ return -1; ++} ++ ++int LoongArchAsmParser::matchFCSRRegisterName(StringRef Name) { ++ if (Name.startswith("fcsr")) { ++ StringRef NumString = Name.substr(4); ++ unsigned IntVal; ++ if (NumString.getAsInteger(10, IntVal)) ++ return -1; // This is not an integer. ++ if (IntVal > 3) // There are only 4 fcsr registers. ++ return -1; ++ return IntVal; ++ } ++ return -1; ++} ++ ++int LoongArchAsmParser::matchLSX128RegisterName(StringRef Name) { ++ unsigned IntVal; ++ ++ if (Name.front() != 'v' || Name.drop_front(2).getAsInteger(10, IntVal)) ++ return -1; ++ ++ if (IntVal > 31) ++ return -1; ++ ++ return IntVal; ++} ++ ++int LoongArchAsmParser::matchLASX256RegisterName(StringRef Name) { ++ unsigned IntVal; ++ ++ if (Name.front() != 'x' || Name.drop_front(2).getAsInteger(10, IntVal)) ++ return -1; ++ ++ if (IntVal > 31) ++ return -1; ++ ++ return IntVal; ++} ++ ++bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseOperand\n"); ++ ++ // Check if the current operand has a custom associated parser, if so, try to ++ // custom parse the operand, or fallback to the general approach. ++ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); ++ if (ResTy == MatchOperand_Success) ++ return false; ++ // If there wasn't a custom match, try the generic matcher below. Otherwise, ++ // there was a match, but an error occurred, in which case, just return that ++ // the operand parsing failed. ++ if (ResTy == MatchOperand_ParseFail) ++ return true; ++ ++ LLVM_DEBUG(dbgs() << ".. Generic Parser\n"); ++ ++ switch (getLexer().getKind()) { ++ case AsmToken::Dollar: { ++ // Parse the register. ++ SMLoc S = Parser.getTok().getLoc(); ++ ++ // Almost all registers have been parsed by custom parsers. There is only ++ // one exception to this. $zero (and it's alias $0) will reach this point ++ // for div, divu, and similar instructions because it is not an operand ++ // to the instruction definition but an explicit register. Special case ++ // this situation for now. ++ if (parseAnyRegister(Operands) != MatchOperand_NoMatch) ++ return false; ++ ++ // Maybe it is a symbol reference. ++ StringRef Identifier; ++ if (Parser.parseIdentifier(Identifier)) ++ return true; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); ++ // Otherwise create a symbol reference. ++ const MCExpr *Res = ++ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); ++ ++ Operands.push_back(LoongArchOperand::CreateImm(Res, S, E, *this)); ++ return false; ++ } ++ default: { ++ LLVM_DEBUG(dbgs() << ".. generic integer expression\n"); ++ ++ const MCExpr *Expr; ++ SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. ++ if (getParser().parseExpression(Expr)) ++ return true; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ ++ Operands.push_back(LoongArchOperand::CreateImm(Expr, S, E, *this)); ++ return false; + } +- llvm_unreachable("Unknown match type detected!"); ++ } // switch(getLexer().getKind()) ++ return true; ++} ++ ++bool LoongArchAsmParser::isEvaluated(const MCExpr *Expr) { ++ switch (Expr->getKind()) { ++ case MCExpr::Constant: ++ return true; ++ case MCExpr::SymbolRef: ++ return (cast(Expr)->getKind() != MCSymbolRefExpr::VK_None); ++ case MCExpr::Binary: { ++ const MCBinaryExpr *BE = cast(Expr); ++ if (!isEvaluated(BE->getLHS())) ++ return false; ++ return isEvaluated(BE->getRHS()); ++ } ++ case MCExpr::Unary: ++ return isEvaluated(cast(Expr)->getSubExpr()); ++ case MCExpr::Target: ++ return true; ++ } ++ return false; ++} ++ ++bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, ++ SMLoc &EndLoc) { ++ return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; ++} ++ ++OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, ++ SMLoc &StartLoc, ++ SMLoc &EndLoc) { ++ SmallVector, 1> Operands; ++ OperandMatchResultTy ResTy = parseAnyRegister(Operands); ++ if (ResTy == MatchOperand_Success) { ++ assert(Operands.size() == 1); ++ LoongArchOperand &Operand = static_cast(*Operands.front()); ++ StartLoc = Operand.getStartLoc(); ++ EndLoc = Operand.getEndLoc(); ++ ++ // AFAIK, we only support numeric registers and named GPR's in CFI ++ // directives. ++ // Don't worry about eating tokens before failing. Using an unrecognised ++ // register is a parse error. ++ if (Operand.isGPRAsmReg()) { ++ // Resolve to GPR32 or GPR64 appropriately. ++ RegNo = is64Bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); ++ } ++ ++ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch ++ : MatchOperand_Success; ++ } ++ ++ assert(Operands.size() == 0); ++ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success; ++} ++ ++bool LoongArchAsmParser::parseMemOffset(const MCExpr *&Res) { ++ return getParser().parseExpression(Res); ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseMemOperand(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseMemOperand\n"); ++ const MCExpr *IdVal = nullptr; ++ SMLoc S; ++ OperandMatchResultTy Res = MatchOperand_NoMatch; ++ // First operand is the base. ++ S = Parser.getTok().getLoc(); ++ ++ Res = parseAnyRegister(Operands); ++ if (Res != MatchOperand_Success) ++ return Res; ++ ++ if (Parser.getTok().isNot(AsmToken::Comma)) { ++ Error(Parser.getTok().getLoc(), "',' expected"); ++ return MatchOperand_ParseFail; ++ } ++ ++ Parser.Lex(); // Eat the ',' token. ++ ++ if (parseMemOffset(IdVal)) ++ return MatchOperand_ParseFail; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ ++ // Replace the register operand with the memory operand. ++ std::unique_ptr op( ++ static_cast(Operands.back().release())); ++ // Remove the register from the operands. ++ // "op" will be managed by k_Memory. ++ Operands.pop_back(); ++ ++ // when symbol not defined, error report. ++ if (dyn_cast(IdVal)) { ++ return MatchOperand_ParseFail; ++ } ++ ++ // Add the memory operand. ++ if (dyn_cast(IdVal)) { ++ int64_t Imm; ++ if (IdVal->evaluateAsAbsolute(Imm)) ++ IdVal = MCConstantExpr::create(Imm, getContext()); ++ else ++ return MatchOperand_ParseFail; ++ } ++ ++ Operands.push_back(LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); ++ return MatchOperand_Success; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseAMemOperand(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseAMemOperand\n"); ++ const MCExpr *IdVal = nullptr; ++ SMLoc S; ++ OperandMatchResultTy Res = MatchOperand_NoMatch; ++ // First operand is the base. ++ S = Parser.getTok().getLoc(); ++ ++ Res = parseAnyRegister(Operands); ++ if (Res != MatchOperand_Success) ++ return Res; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ ++ // AM* instructions allow an optional '0' memory offset. ++ if (Parser.getTok().is(AsmToken::Comma)) { ++ Parser.Lex(); // Eat the ',' token. ++ ++ if (parseMemOffset(IdVal)) ++ return MatchOperand_ParseFail; ++ ++ // when symbol not defined, error report. ++ if (dyn_cast(IdVal)) ++ return MatchOperand_ParseFail; ++ ++ if (dyn_cast(IdVal)) { ++ int64_t Imm; ++ if (IdVal->evaluateAsAbsolute(Imm)) { ++ assert(Imm == 0 && "imm must be 0"); ++ IdVal = MCConstantExpr::create(Imm, getContext()); ++ } else { ++ return MatchOperand_ParseFail; ++ } ++ } ++ } else { ++ // Offset defaults to 0. ++ IdVal = MCConstantExpr::create(0, getContext()); ++ } ++ ++ // Replace the register operand with the memory operand. ++ std::unique_ptr op( ++ static_cast(Operands.back().release())); ++ // Remove the register from the operands. ++ // "op" will be managed by k_Memory. ++ Operands.pop_back(); ++ // Add the memory operand. ++ Operands.push_back( ++ LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); ++ return MatchOperand_Success; ++} ++ ++bool LoongArchAsmParser::searchSymbolAlias(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier()); ++ if (!Sym) ++ return false; ++ ++ SMLoc S = Parser.getTok().getLoc(); ++ if (Sym->isVariable()) { ++ const MCExpr *Expr = Sym->getVariableValue(); ++ if (Expr->getKind() == MCExpr::SymbolRef) { ++ const MCSymbolRefExpr *Ref = static_cast(Expr); ++ StringRef DefSymbol = Ref->getSymbol().getName(); ++ if (DefSymbol.startswith("$")) { ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); ++ return true; ++ } ++ if (ResTy == MatchOperand_ParseFail) ++ llvm_unreachable("Should never ParseFail"); ++ } ++ } ++ } else if (Sym->isUnset()) { ++ // If symbol is unset, it might be created in the `parseSetAssignment` ++ // routine as an alias for a numeric register name. ++ // Lookup in the aliases list. ++ auto Entry = RegisterSets.find(Sym->getName()); ++ if (Entry != RegisterSets.end()) { ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands, ++ StringRef Identifier, ++ SMLoc S) { ++ int Index = matchCPURegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createGPRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFPURegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFGRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFCFRRegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFCFRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFCSRRegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFCSRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchLSX128RegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createLSX128Reg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchLASX256RegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createLASX256Reg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ return MatchOperand_NoMatch; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ const AsmToken &Token, SMLoc S) { ++ if (Token.is(AsmToken::Identifier)) { ++ LLVM_DEBUG(dbgs() << ".. identifier\n"); ++ StringRef Identifier = Token.getIdentifier(); ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterNameWithoutDollar(Operands, Identifier, S); ++ return ResTy; ++ } else if (Token.is(AsmToken::Integer)) { ++ LLVM_DEBUG(dbgs() << ".. integer\n"); ++ int64_t RegNum = Token.getIntVal(); ++ if (RegNum < 0 || RegNum > 31) { ++ // Show the error, but treat invalid register ++ // number as a normal one to continue parsing ++ // and catch other possible errors. ++ Error(getLexer().getLoc(), "invalid register number"); ++ } ++ Operands.push_back(LoongArchOperand::createNumericReg( ++ RegNum, Token.getString(), getContext().getRegisterInfo(), S, ++ Token.getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ LLVM_DEBUG(dbgs() << Token.getKind() << "\n"); ++ ++ return MatchOperand_NoMatch; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { ++ auto Token = getLexer().peekTok(false); ++ return matchAnyRegisterWithoutDollar(Operands, Token, S); ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseAnyRegister(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseAnyRegister\n"); ++ ++ auto Token = Parser.getTok(); ++ ++ SMLoc S = Token.getLoc(); ++ ++ if (Token.isNot(AsmToken::Dollar)) { ++ LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n"); ++ if (Token.is(AsmToken::Identifier)) { ++ if (searchSymbolAlias(Operands)) ++ return MatchOperand_Success; ++ } ++ LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n"); ++ return MatchOperand_NoMatch; ++ } ++ LLVM_DEBUG(dbgs() << ".. $\n"); ++ ++ OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); // $ ++ Parser.Lex(); // identifier ++ } ++ return ResTy; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseJumpTarget(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseJumpTarget\n"); ++ ++ SMLoc S = getLexer().getLoc(); ++ ++ // Registers are a valid target and have priority over symbols. ++ OperandMatchResultTy ResTy = parseAnyRegister(Operands); ++ if (ResTy != MatchOperand_NoMatch) ++ return ResTy; ++ ++ // Integers and expressions are acceptable ++ const MCExpr *Expr = nullptr; ++ if (Parser.parseExpression(Expr)) { ++ // We have no way of knowing if a symbol was consumed so we must ParseFail ++ return MatchOperand_ParseFail; ++ } ++ Operands.push_back( ++ LoongArchOperand::CreateImm(Expr, S, getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++} ++ ++static std::string LoongArchMnemonicSpellCheck(StringRef S, ++ const FeatureBitset &FBS, ++ unsigned VariantID = 0); ++ ++bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, ++ StringRef Name, SMLoc NameLoc, ++ OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "ParseInstruction\n"); ++ ++ // We have reached first instruction, module directive are now forbidden. ++ getTargetStreamer().forbidModuleDirective(); ++ ++ // Check if we have valid mnemonic ++ if (!mnemonicIsValid(Name)) { ++ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); ++ std::string Suggestion = LoongArchMnemonicSpellCheck(Name, FBS); ++ return Error(NameLoc, "unknown instruction" + Suggestion); ++ } ++ ++ // First operand in MCInst is instruction mnemonic. ++ Operands.push_back(LoongArchOperand::CreateToken(Name, NameLoc, *this)); ++ ++ // Read the remaining operands. ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ // Read the first operand. ++ if (parseOperand(Operands, Name)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ ++ while (getLexer().is(AsmToken::Comma)) { ++ Parser.Lex(); // Eat the comma. ++ // Parse and remember the operand. ++ if (parseOperand(Operands, Name)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ } ++ } ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ Parser.Lex(); // Consume the EndOfStatement. ++ return false; ++} ++ ++// FIXME: Given that these have the same name, these should both be ++// consistent on affecting the Parser. ++bool LoongArchAsmParser::reportParseError(Twine ErrorMsg) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, ErrorMsg); ++} ++ ++bool LoongArchAsmParser::parseSetAssignment() { ++ StringRef Name; ++ const MCExpr *Value; ++ MCAsmParser &Parser = getParser(); ++ ++ if (Parser.parseIdentifier(Name)) ++ return reportParseError("expected identifier after .set"); ++ ++ if (getLexer().isNot(AsmToken::Comma)) ++ return reportParseError("unexpected token, expected comma"); ++ Lex(); // Eat comma ++ ++ if (!Parser.parseExpression(Value)) { ++ // Parse assignment of an expression including ++ // symbolic registers: ++ // .set $tmp, $BB0-$BB1 ++ // .set r2, $f2 ++ MCSymbol *Sym = getContext().getOrCreateSymbol(Name); ++ Sym->setVariableValue(Value); ++ } else { ++ return reportParseError("expected valid expression after comma"); ++ } ++ ++ return false; ++} ++ ++bool LoongArchAsmParser::parseDirectiveSet() { ++ const AsmToken &Tok = getParser().getTok(); ++ StringRef IdVal = Tok.getString(); ++ SMLoc Loc = Tok.getLoc(); ++ ++ if (IdVal == "bopt") { ++ Warning(Loc, "'bopt' feature is unsupported"); ++ getParser().Lex(); ++ return false; ++ } ++ if (IdVal == "nobopt") { ++ // We're already running in nobopt mode, so nothing to do. ++ getParser().Lex(); ++ return false; ++ } ++ ++ // It is just an identifier, look for an assignment. ++ return parseSetAssignment(); ++} ++ ++bool LoongArchAsmParser::ParseDirective(AsmToken DirectiveID) { ++ // This returns false if this function recognizes the directive ++ // regardless of whether it is successfully handles or reports an ++ // error. Otherwise it returns true to give the generic parser a ++ // chance at recognizing it. ++ ++ MCAsmParser &Parser = getParser(); ++ StringRef IDVal = DirectiveID.getString(); ++ ++ if (IDVal == ".end") { ++ while (getLexer().isNot(AsmToken::Eof)) ++ Parser.Lex(); ++ return false; ++ } ++ ++ if (IDVal == ".set") { ++ parseDirectiveSet(); ++ return false; ++ } ++ ++ if (IDVal == ".llvm_internal_loongarch_reallow_module_directive") { ++ parseInternalDirectiveReallowModule(); ++ return false; ++ } ++ ++ return true; ++} ++ ++bool LoongArchAsmParser::parseInternalDirectiveReallowModule() { ++ // If this is not the end of the statement, report an error. ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ reportParseError("unexpected token, expected end of statement"); ++ return false; ++ } ++ ++ getTargetStreamer().reallowModuleDirective(); ++ ++ getParser().Lex(); // Eat EndOfStatement token. ++ return false; + } + + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() { + RegisterMCAsmParser X(getTheLoongArch32Target()); +- RegisterMCAsmParser Y(getTheLoongArch64Target()); ++ RegisterMCAsmParser A(getTheLoongArch64Target()); ++} ++ ++#define GET_REGISTER_MATCHER ++#define GET_MATCHER_IMPLEMENTATION ++#define GET_MNEMONIC_SPELL_CHECKER ++#include "LoongArchGenAsmMatcher.inc" ++ ++bool LoongArchAsmParser::mnemonicIsValid(StringRef Mnemonic) { ++ // Find the appropriate table for this asm variant. ++ const MatchEntry *Start, *End; ++ Start = std::begin(MatchTable0); ++ End = std::end(MatchTable0); ++ ++ // Search the table. ++ auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode()); ++ return MnemonicRange.first != MnemonicRange.second; + } +diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt +index 4d8e81aea..8540b97ff 100644 +--- a/llvm/lib/Target/LoongArch/CMakeLists.txt ++++ b/llvm/lib/Target/LoongArch/CMakeLists.txt +@@ -1,14 +1,15 @@ +-add_llvm_component_group(LoongArch) ++add_llvm_component_group(LoongArch HAS_JIT) + + set(LLVM_TARGET_DEFINITIONS LoongArch.td) + + tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher) + tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer) ++tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv) + tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel) + tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler) + tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) +-tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) + tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) ++tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) + tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) + tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) + +@@ -16,14 +17,20 @@ add_public_tablegen_target(LoongArchCommonTableGen) + + add_llvm_target(LoongArchCodeGen + LoongArchAsmPrinter.cpp +- LoongArchFrameLowering.cpp ++ LoongArchCCState.cpp ++ LoongArchExpandPseudo.cpp + LoongArchInstrInfo.cpp + LoongArchISelDAGToDAG.cpp + LoongArchISelLowering.cpp ++ LoongArchFrameLowering.cpp + LoongArchMCInstLower.cpp ++ LoongArchMachineFunction.cpp ++ LoongArchModuleISelDAGToDAG.cpp + LoongArchRegisterInfo.cpp + LoongArchSubtarget.cpp + LoongArchTargetMachine.cpp ++ LoongArchTargetObjectFile.cpp ++ LoongArchTargetTransformInfo.cpp + + LINK_COMPONENTS + Analysis +diff --git a/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt b/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt +index 1cce676cf..864be6313 100644 +--- a/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt ++++ b/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt +@@ -2,10 +2,8 @@ add_llvm_component_library(LLVMLoongArchDisassembler + LoongArchDisassembler.cpp + + LINK_COMPONENTS +- LoongArchDesc +- LoongArchInfo +- MC + MCDisassembler ++ LoongArchInfo + Support + + ADD_TO_COMPONENT +diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +index beb757c78..6468a0fc8 100644 +--- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp ++++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchDisassembler.cpp - Disassembler for LoongArch ------------===// ++//===- LoongArchDisassembler.cpp - Disassembler for LoongArch -----------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,140 +6,935 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file implements the LoongArchDisassembler class. ++// This file is part of the LoongArch Disassembler. + // + //===----------------------------------------------------------------------===// + +-#include "MCTargetDesc/LoongArchBaseInfo.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" +-#include "TargetInfo/LoongArchTargetInfo.h" ++#include "LoongArch.h" ++#include "llvm/ADT/ArrayRef.h" + #include "llvm/MC/MCContext.h" + #include "llvm/MC/MCDecoderOps.h" + #include "llvm/MC/MCDisassembler/MCDisassembler.h" + #include "llvm/MC/MCInst.h" +-#include "llvm/MC/MCInstrInfo.h" + #include "llvm/MC/MCRegisterInfo.h" + #include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" + #include "llvm/MC/TargetRegistry.h" +-#include "llvm/Support/Endian.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include + + using namespace llvm; + + #define DEBUG_TYPE "loongarch-disassembler" + +-typedef MCDisassembler::DecodeStatus DecodeStatus; ++using DecodeStatus = MCDisassembler::DecodeStatus; + + namespace { ++ + class LoongArchDisassembler : public MCDisassembler { ++ + public: + LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + ++ bool isFP64() const { return STI.getFeatureBits()[LoongArch::FeatureFP64Bit]; } ++ ++ bool is64Bit() const { return STI.getFeatureBits()[LoongArch::Feature64Bit]; } ++ + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; + }; +-} // end namespace + +-static MCDisassembler *createLoongArchDisassembler(const Target &T, +- const MCSubtargetInfo &STI, +- MCContext &Ctx) { ++} // end anonymous namespace ++ ++// Forward declare these because the autogenerated code will reference them. ++// Definitions are further down. ++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeBranchTarget(MCInst &Inst, ++ unsigned Offset, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeJumpTarget(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeMemSimm14(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++template ++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder); ++ ++template ++static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ return DecodeUImmWithOffsetAndScale(Inst, Value, Address, ++ Decoder); ++} ++ ++template ++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder); ++ ++/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't ++/// handle. ++template ++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, ++ const void *Decoder); ++ ++namespace llvm { ++ ++Target &getTheLoongArch32Target(); ++Target &getTheLoongArch64Target(); ++ ++} // end namespace llvm ++ ++static MCDisassembler *createLoongArchDisassembler( ++ const Target &T, ++ const MCSubtargetInfo &STI, ++ MCContext &Ctx) { + return new LoongArchDisassembler(STI, Ctx); + } + + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() { +- // Register the disassembler for each target. ++ // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(), + createLoongArchDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(), + createLoongArchDisassembler); + } + +-static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, +- uint64_t Address, +- const MCDisassembler *Decoder) { +- if (RegNo >= 32) ++#include "LoongArchGenDisassemblerTables.inc" ++ ++static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { ++ const LoongArchDisassembler *Dis = static_cast(D); ++ const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); ++ if (RC == LoongArch::GPR64RegClassID || RC == LoongArch::GPR32RegClassID) { ++ // sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ // that just like LoongArchAsmParser.cpp and LoongArchISelLowering.cpp ++ unsigned char indexes[] = { 0, 27, 28, 29, 1, 2, 3, 4, ++ 5, 6, 7, 8, 9, 10, 11, 12, ++ 13, 14, 15, 16, 17, 30, 31, 18, ++ 19, 20, 21, 22, 23, 24, 25, 26 ++ }; ++ assert(RegNo < sizeof(indexes)); ++ return *(RegInfo->getRegClass(RC).begin() + indexes[RegNo]); ++ } ++ return *(RegInfo->getRegClass(RC).begin() + RegNo); ++} ++ ++template ++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, ++ const void *Decoder) { ++ using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *); ++ ++ // The size of the n field depends on the element size ++ // The register class also depends on this. ++ InsnType tmp = fieldFromInstruction(insn, 17, 5); ++ unsigned NSize = 0; ++ DecodeFN RegDecoder = nullptr; ++ if ((tmp & 0x18) == 0x00) { ++ NSize = 4; ++ RegDecoder = DecodeLSX128BRegisterClass; ++ } else if ((tmp & 0x1c) == 0x10) { ++ NSize = 3; ++ RegDecoder = DecodeLSX128HRegisterClass; ++ } else if ((tmp & 0x1e) == 0x18) { ++ NSize = 2; ++ RegDecoder = DecodeLSX128WRegisterClass; ++ } else if ((tmp & 0x1f) == 0x1c) { ++ NSize = 1; ++ RegDecoder = DecodeLSX128DRegisterClass; ++ } else ++ llvm_unreachable("Invalid encoding"); ++ ++ assert(NSize != 0 && RegDecoder != nullptr); ++ ++ // $vd ++ tmp = fieldFromInstruction(insn, 6, 5); ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $vd_in ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $n ++ tmp = fieldFromInstruction(insn, 16, NSize); ++ MI.addOperand(MCOperand::createImm(tmp)); ++ // $vs ++ tmp = fieldFromInstruction(insn, 11, 5); ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $n2 ++ MI.addOperand(MCOperand::createImm(0)); ++ ++ return MCDisassembler::Success; ++} ++ ++/// Read four bytes from the ArrayRef and return 32 bit word. ++static DecodeStatus readInstruction32(ArrayRef Bytes, uint64_t Address, ++ uint64_t &Size, uint32_t &Insn) { ++ // We want to read exactly 4 Bytes of data. ++ if (Bytes.size() < 4) { ++ Size = 0; + return MCDisassembler::Fail; +- Inst.addOperand(MCOperand::createReg(LoongArch::R0 + RegNo)); ++ } ++ ++ Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | ++ (Bytes[3] << 24); ++ + return MCDisassembler::Success; + } + +-static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, ++DecodeStatus LoongArchDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, ++ ArrayRef Bytes, ++ uint64_t Address, ++ raw_ostream &CStream) const { ++ uint32_t Insn; ++ DecodeStatus Result; ++ Size = 0; ++ ++ // Attempt to read the instruction so that we can attempt to decode it. If ++ // the buffer is not 4 bytes long, let the higher level logic figure out ++ // what to do with a size of zero and MCDisassembler::Fail. ++ Result = readInstruction32(Bytes, Address, Size, Insn); ++ if (Result == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ ++ // The only instruction size for standard encoded LoongArch. ++ Size = 4; ++ ++ if (is64Bit()) { ++ LLVM_DEBUG(dbgs() << "Trying LoongArch (GPR64) table (32-bit opcodes):\n"); ++ Result = decodeInstruction(DecoderTableLoongArch32, Instr, Insn, ++ Address, this, STI); ++ if (Result != MCDisassembler::Fail) ++ return Result; ++ } ++ ++ LLVM_DEBUG(dbgs() << "Trying LoongArch32 (GPR32) table (32-bit opcodes):\n"); ++ Result = decodeInstruction(DecoderTableLoongArch3232, Instr, Insn, ++ Address, this, STI); ++ if (Result != MCDisassembler::Fail) ++ return Result; ++ ++ return MCDisassembler::Fail; ++} ++ ++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, + uint64_t Address, +- const MCDisassembler *Decoder) { +- if (RegNo >= 32) ++ const void *Decoder) { ++ if (RegNo > 31) + return MCDisassembler::Fail; +- Inst.addOperand(MCOperand::createReg(LoongArch::F0 + RegNo)); ++ ++ unsigned Reg = getReg(Decoder, LoongArch::GPR64RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; + } + +-static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, ++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, + uint64_t Address, +- const MCDisassembler *Decoder) { +- if (RegNo >= 32) ++ const void *Decoder) { ++ if (RegNo > 31) + return MCDisassembler::Fail; +- Inst.addOperand(MCOperand::createReg(LoongArch::F0_64 + RegNo)); ++ unsigned Reg = getReg(Decoder, LoongArch::GPR32RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; + } + +-static DecodeStatus DecodeCFRRegisterClass(MCInst &Inst, uint64_t RegNo, ++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, ++ unsigned RegNo, + uint64_t Address, +- const MCDisassembler *Decoder) { +- if (RegNo >= 8) ++ const void *Decoder) { ++ if (static_cast(Decoder)->is64Bit()) ++ return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); ++ ++ return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); ++} ++ ++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) + return MCDisassembler::Fail; +- Inst.addOperand(MCOperand::createReg(LoongArch::FCC0 + RegNo)); ++ unsigned Reg = getReg(Decoder, LoongArch::FGR64RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; + } + +-static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo, ++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FGR32RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, ++ unsigned RegNo, + uint64_t Address, +- const MCDisassembler *Decoder) { +- if (RegNo >= 4) ++ const void *Decoder) { ++ if (RegNo > 31) + return MCDisassembler::Fail; +- Inst.addOperand(MCOperand::createReg(LoongArch::FCSR0 + RegNo)); ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FCSRRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; + } + +-template +-static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, +- int64_t Address, +- const MCDisassembler *Decoder) { +- assert(isUInt(Imm) && "Invalid immediate"); +- Inst.addOperand(MCOperand::createImm(Imm + P)); ++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 7) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FCFRRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ + return MCDisassembler::Success; + } + +-template +-static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, +- int64_t Address, +- const MCDisassembler *Decoder) { +- assert(isUInt(Imm) && "Invalid immediate"); +- // Sign-extend the number in the bottom bits of Imm, then shift left +- // bits. +- Inst.addOperand(MCOperand::createImm(SignExtend64(Imm) << S)); ++static DecodeStatus DecodeMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0xfff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ ++ Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ if (Inst.getOpcode() == LoongArch::SC_W || ++ Inst.getOpcode() == LoongArch::SC_D) ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ + return MCDisassembler::Success; + } + +-#include "LoongArchGenDisassemblerTables.inc" ++static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, ++ const void *Decoder) { ++ unsigned Rd = fieldFromInstruction(Insn, 0, 5); ++ unsigned Rj = fieldFromInstruction(Insn, 5, 5); ++ unsigned Rk = fieldFromInstruction(Insn, 10, 5); + +-DecodeStatus LoongArchDisassembler::getInstruction(MCInst &MI, uint64_t &Size, +- ArrayRef Bytes, +- uint64_t Address, +- raw_ostream &CS) const { +- uint32_t Insn; +- DecodeStatus Result; ++ Rd = getReg(Decoder, LoongArch::GPR32RegClassID, Rd); ++ Rj = getReg(Decoder, LoongArch::GPR32RegClassID, Rj); ++ Rk = getReg(Decoder, LoongArch::GPR32RegClassID, Rk); + +- // We want to read exactly 4 bytes of data because all LoongArch instructions +- // are fixed 32 bits. +- if (Bytes.size() < 4) { +- Size = 0; ++ // Note the operands sequence is "rd,rk,rj". ++ Inst.addOperand(MCOperand::createReg(Rd)); ++ Inst.addOperand(MCOperand::createReg(Rk)); ++ Inst.addOperand(MCOperand::createReg(Rj)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeMemSimm14(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0x3fff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ ++ Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ if (Inst.getOpcode() == LoongArch::SC_W || ++ Inst.getOpcode() == LoongArch::SC_D) ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ unsigned idx; ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; ++ break; ++ case LoongArch::VSTELM_B: ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ idx = fieldFromInstruction(Insn, 18, 4); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_H: ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ idx = fieldFromInstruction(Insn, 18, 3); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_W: ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ idx = fieldFromInstruction(Insn, 18, 2); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_D: ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ idx = fieldFromInstruction(Insn, 18, 1); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; + } + +- Insn = support::endian::read32le(Bytes.data()); +- // Calling the auto-generated decoder function. +- Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); +- Size = 4; ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ ++ int Offset; ++ unsigned Reg, Base; ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::VLDREPL_B: ++ ++ Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ break; ++ case LoongArch::VLDREPL_H: ++ ++ Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ break; ++ case LoongArch::VLDREPL_W: ++ ++ Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ break; ++ case LoongArch::VLDREPL_D: ++ ++ Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ unsigned idx; ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::XVSTELM_B: ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ idx = fieldFromInstruction(Insn, 18, 5); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_H: ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ idx = fieldFromInstruction(Insn, 18, 4); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_W: ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ idx = fieldFromInstruction(Insn, 18, 3); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_D: ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ idx = fieldFromInstruction(Insn, 18, 2); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { + +- return Result; ++ int Offset; ++ unsigned Reg, Base; ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::XVLDREPL_B: ++ ++ Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ break; ++ case LoongArch::XVLDREPL_H: ++ ++ Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ break; ++ case LoongArch::XVLDREPL_W: ++ ++ Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ break; ++ case LoongArch::XVLDREPL_D: ++ ++ Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0xffff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::FGR64RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128DRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256DRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeBranchTarget(MCInst &Inst, ++ unsigned Offset, ++ uint64_t Address, ++ const void *Decoder) { ++ int32_t BranchOffset; ++ // Similar to LoongArchAsmParser::processInstruction, decode the branch target ++ // for different instructions. ++ switch (Inst.getOpcode()) { ++ default: ++ llvm_unreachable(""); ++ case LoongArch::BEQ: ++ case LoongArch::BNE: ++ case LoongArch::BLT: ++ case LoongArch::BGE: ++ case LoongArch::BLTU: ++ case LoongArch::BGEU: ++ BranchOffset = (SignExtend32<16>(Offset) * 4); ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BNEZ: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ BranchOffset = (SignExtend32<21>(Offset) * 4); ++ break; ++ case LoongArch::B: ++ case LoongArch::BL: ++ BranchOffset = (SignExtend32<26>(Offset) * 4); ++ break; ++ } ++ Inst.addOperand(MCOperand::createImm(BranchOffset)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeJumpTarget(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ unsigned hi10 = fieldFromInstruction(Insn, 0, 10); ++ unsigned lo16 = fieldFromInstruction(Insn, 10, 16); ++ int32_t JumpOffset = SignExtend32<28>((hi10 << 16 | lo16) << 2); ++ Inst.addOperand(MCOperand::createImm(JumpOffset)); ++ return MCDisassembler::Success; ++} ++ ++template ++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ Value &= ((1 << Bits) - 1); ++ Value *= Scale; ++ Inst.addOperand(MCOperand::createImm(Value + Offset)); ++ return MCDisassembler::Success; ++} ++ ++template ++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ int32_t Imm = SignExtend32(Value) * ScaleBy; ++ Inst.addOperand(MCOperand::createImm(Imm + Offset)); ++ return MCDisassembler::Success; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h +index e6c9c24dd..73fd4a628 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.h ++++ b/llvm/lib/Target/LoongArch/LoongArch.h +@@ -1,4 +1,4 @@ +-//===-- LoongArch.h - Top-level interface for LoongArch ---------*- C++ -*-===// ++//===-- LoongArch.h - Top-level interface for LoongArch representation ----*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,33 +6,32 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file contains the entry points for global functions defined in the LLVM +-// LoongArch back-end. ++// This file contains the entry points for global functions defined in ++// the LLVM LoongArch back-end. + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H + +-#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "llvm/Target/TargetMachine.h" + + namespace llvm { +-class LoongArchTargetMachine; +-class AsmPrinter; +-class FunctionPass; +-class MCInst; +-class MCOperand; +-class MachineInstr; +-class MachineOperand; ++ class LoongArchTargetMachine; ++ class ModulePass; ++ class FunctionPass; ++ class LoongArchSubtarget; ++ class LoongArchTargetMachine; ++ class InstructionSelector; ++ class PassRegistry; + +-bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, +- AsmPrinter &AP); +-bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, +- MCOperand &MCOp, +- const AsmPrinter &AP); ++ FunctionPass *createLoongArchModuleISelDagPass(); ++ FunctionPass *createLoongArchOptimizePICCallPass(); ++ FunctionPass *createLoongArchBranchExpansion(); ++ FunctionPass *createLoongArchExpandPseudoPass(); + +-FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); +-} // end namespace llvm ++ void initializeLoongArchBranchExpansionPass(PassRegistry &); ++} // end namespace llvm; + +-#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index bf465c27e..703c1ba50 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -1,139 +1,107 @@ +-//===-- LoongArch.td - Describe the LoongArch Target -------*- tablegen -*-===// ++//===-- LoongArch.td - Describe the LoongArch Target Machine ---------*- tablegen -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// ++// This is the top level entry point for the LoongArch target. ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Target-independent interfaces ++//===----------------------------------------------------------------------===// + + include "llvm/Target/Target.td" + ++// The overall idea of the PredicateControl class is to chop the Predicates list ++// into subsets that are usually overridden independently. This allows ++// subclasses to partially override the predicates of their superclasses without ++// having to re-add all the existing predicates. ++class PredicateControl { ++ // Predicates for the encoding scheme in use such as HasStdEnc ++ list EncodingPredicates = []; ++ // Predicates for the GPR size such as is64Bit ++ list GPRPredicates = []; ++ // Predicates for the FGR size and layout such as IsFP64bit ++ list FGRPredicates = []; ++ // Predicates for the instruction group membership such as ISA's. ++ list InsnPredicates = []; ++ // Predicate for the ISA extension that an instruction belongs to. ++ list ExtPredicate = []; ++ // Predicate for marking the instruction as usable in hard-float mode only. ++ list HardFloatPredicate = []; ++ // Predicates for anything else ++ list AdditionalPredicates = []; ++ list Predicates = !listconcat(EncodingPredicates, ++ GPRPredicates, ++ FGRPredicates, ++ InsnPredicates, ++ HardFloatPredicate, ++ ExtPredicate, ++ AdditionalPredicates); ++} ++ ++// Like Requires<> but for the AdditionalPredicates list ++class AdditionalRequires preds> { ++ list AdditionalPredicates = preds; ++} ++ + //===----------------------------------------------------------------------===// +-// LoongArch subtarget features and instruction predicates. ++// LoongArch Subtarget features // + //===----------------------------------------------------------------------===// + +-// LoongArch is divided into two versions, the 32-bit version (LA32) and the +-// 64-bit version (LA64). +-def Feature64Bit +- : SubtargetFeature<"64bit", "HasLA64", "true", +- "LA64 Basic Integer and Privilege Instruction Set">; +-def IsLA64 +- : Predicate<"Subtarget->is64Bit()">, +- AssemblerPredicate<(all_of Feature64Bit), +- "LA64 Basic Integer and Privilege Instruction Set">; +-def IsLA32 +- : Predicate<"!Subtarget->is64Bit()">, +- AssemblerPredicate<(all_of(not Feature64Bit)), +- "LA32 Basic Integer and Privilege Instruction Set">; +- +-defvar LA32 = DefaultMode; +-def LA64 : HwMode<"+64bit">; +- +-// Single Precision floating point +-def FeatureBasicF +- : SubtargetFeature<"f", "HasBasicF", "true", +- "'F' (Single-Precision Floating-Point)">; +-def HasBasicF +- : Predicate<"Subtarget->hasBasicF()">, +- AssemblerPredicate<(all_of FeatureBasicF), +- "'F' (Single-Precision Floating-Point)">; +- +-// Double Precision floating point +-def FeatureBasicD +- : SubtargetFeature<"d", "HasBasicD", "true", +- "'D' (Double-Precision Floating-Point)", +- [FeatureBasicF]>; +-def HasBasicD +- : Predicate<"Subtarget->hasBasicD()">, +- AssemblerPredicate<(all_of FeatureBasicD), +- "'D' (Double-Precision Floating-Point)">; +- +-// Loongson SIMD eXtension (LSX) +-def FeatureExtLSX +- : SubtargetFeature<"lsx", "HasExtLSX", "true", +- "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; +-def HasExtLSX +- : Predicate<"Subtarget->hasExtLSX()">, +- AssemblerPredicate<(all_of FeatureExtLSX), +- "'LSX' (Loongson SIMD Extension)">; +- +-// Loongson Advanced SIMD eXtension (LASX) +-def FeatureExtLASX +- : SubtargetFeature<"lasx", "HasExtLASX", "true", +- "'LASX' (Loongson Advanced SIMD Extension)", +- [FeatureExtLSX]>; +-def HasExtLASX +- : Predicate<"Subtarget->hasExtLASX()">, +- AssemblerPredicate<(all_of FeatureExtLASX), +- "'LASX' (Loongson Advanced SIMD Extension)">; +- +-// Loongson VirtualiZation (LVZ) +-def FeatureExtLVZ +- : SubtargetFeature<"lvz", "HasExtLVZ", "true", +- "'LVZ' (Loongson Virtualization Extension)">; +-def HasExtLVZ +- : Predicate<"Subtarget->hasExtLVZ()">, +- AssemblerPredicate<(all_of FeatureExtLVZ), +- "'LVZ' (Loongson Virtualization Extension)">; +- +-// Loongson Binary Translation (LBT) +-def FeatureExtLBT +- : SubtargetFeature<"lbt", "HasExtLBT", "true", +- "'LBT' (Loongson Binary Translation Extension)">; +-def HasExtLBT +- : Predicate<"Subtarget->hasExtLBT()">, +- AssemblerPredicate<(all_of FeatureExtLBT), +- "'LBT' (Loongson Binary Translation Extension)">; +- ++def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", ++ "Support 64-bit FP registers">; ++def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", ++ "true", "Only supports single precision float">; ++def FeatureSoftFloat : SubtargetFeature<"soft-float", "IsSoftFloat", "true", ++ "Does not support floating point instructions">; ++def Feature64Bit : SubtargetFeature<"64bit", "HasLA64", "true", ++ "Support LA64 ISA", ++ [FeatureFP64Bit]>; ++def FeatureLSX : SubtargetFeature<"lsx", "HasLSX", "true", "Support LSX">; ++ ++def FeatureLASX : SubtargetFeature<"lasx", "HasLASX", "true", "Support LASX", [FeatureLSX]>; ++ ++def FeatureUnalignedAccess ++ : SubtargetFeature<"unaligned-access", "UnalignedAccess", "true", ++ "Allow all unaligned memory access">; + //===----------------------------------------------------------------------===// +-// Registers, instruction descriptions ... ++// Register File, Calling Conv, Instruction Descriptions + //===----------------------------------------------------------------------===// + + include "LoongArchRegisterInfo.td" +-include "LoongArchCallingConv.td" + include "LoongArchInstrInfo.td" ++include "LoongArchCallingConv.td" ++ ++def LoongArchInstrInfo : InstrInfo; + + //===----------------------------------------------------------------------===// + // LoongArch processors supported. + //===----------------------------------------------------------------------===// + +-def : ProcessorModel<"generic-la32", NoSchedModel, []>; +-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; ++def : ProcessorModel<"la264", NoSchedModel, [Feature64Bit]>; + +-def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, +- FeatureExtLASX, +- FeatureExtLVZ, +- FeatureExtLBT]>; ++def : ProcessorModel<"la364", NoSchedModel, [Feature64Bit]>; + +-//===----------------------------------------------------------------------===// +-// Define the LoongArch target. +-//===----------------------------------------------------------------------===// +- +-def LoongArchInstrInfo : InstrInfo { +- // guess mayLoad, mayStore, and hasSideEffects +- // This option is a temporary migration help. It will go away. +- let guessInstructionProperties = 1; +-} ++def : ProcessorModel<"la464", NoSchedModel, ++ [Feature64Bit, FeatureUnalignedAccess]>; + + def LoongArchAsmParser : AsmParser { +- let ShouldEmitMatchRegisterAltName = 1; +- let AllowDuplicateRegisterNames = 1; ++ let ShouldEmitMatchRegisterName = 0; + } + + def LoongArchAsmParserVariant : AsmParserVariant { + int Variant = 0; ++ + // Recognize hard coded registers. + string RegisterPrefix = "$"; + } + +-def LoongArchAsmWriter : AsmWriter { +- int PassSubtarget = 1; +-} +- + def LoongArch : Target { + let InstructionSet = LoongArchInstrInfo; + let AssemblyParsers = [LoongArchAsmParser]; + let AssemblyParserVariants = [LoongArchAsmParserVariant]; +- let AssemblyWriters = [LoongArchAsmWriter]; + let AllowRegisterRenaming = 1; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +new file mode 100644 +index 000000000..e85fce2fd +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +@@ -0,0 +1,716 @@ ++//===- LoongArch32InstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes LoongArch32 instructions. ++// ++//===----------------------------------------------------------------------===// ++ ++//===---------------------------------------------------------------------===/ ++// Instruction Definitions. ++//===---------------------------------------------------------------------===/ ++ ++let DecoderNamespace = "LoongArch32" in { ++ /// ++ /// R2 ++ /// ++ def CLO_W : Count1<"clo.w", GPR32Opnd, ctlz>, R2I<0b00100>; ++ def CLZ_W : Int_Reg2<"clz.w", GPR32Opnd, ctlz>, R2I<0b00101>; ++ def CTO_W : Count1<"cto.w", GPR32Opnd, cttz>, R2I<0b00110>; ++ def CTZ_W : Int_Reg2<"ctz.w", GPR32Opnd, cttz>, R2I<0b00111>; ++ ++ def REVB_2H : Int_Reg2<"revb.2h", GPR32Opnd>, R2I<0b01100>;//see below bswap pattern ++ ++ def BITREV_4B : Int_Reg2<"bitrev.4b", GPR32Opnd>, R2I<0b10010>; ++ def BITREV_W : Int_Reg2<"bitrev.w", GPR32Opnd, bitreverse>, R2I<0b10100>; ++ ++ let isCodeGenOnly = 1 in { ++ def EXT_W_H32 : SignExtInReg<"ext.w.h", GPR32Opnd, i16>, R2I<0b10110>; ++ def EXT_W_B32 : SignExtInReg<"ext.w.b", GPR32Opnd, i8>, R2I<0b10111>; ++ ++ } ++ ++ def CPUCFG : Int_Reg2<"cpucfg", GPR32Opnd, int_loongarch_cpucfg>, R2I<0b11011>; ++ def RDTIMEL_W32 : Int_Reg2_Rdtime<"rdtimel.w", GPR32Opnd>, R2I<0b11000>; ++ def RDTIMEH_W32 : Int_Reg2_Rdtime<"rdtimeh.w", GPR32Opnd>, R2I<0b11001>; ++ ++ /// ++ /// R3 ++ /// ++ def ADD_W : Int_Reg3<"add.w", GPR32Opnd, add>, R3I<0b0100000>; ++ def SUB_W : Int_Reg3<"sub.w", GPR32Opnd, sub>, R3I<0b0100010>; ++ ++ let isCodeGenOnly = 1 in { ++ def SLT32 : SetCC_R<"slt", GPR32Opnd, setlt>, R3I<0b0100100>; ++ def SLTU32 : SetCC_R<"sltu", GPR32Opnd, setult>, R3I<0b0100101>; ++ def MASKEQZ32 : Int_Reg3<"maskeqz", GPR32Opnd>, R3I<0b0100110>;//see below patterns ++ def MASKNEZ32 : Int_Reg3<"masknez", GPR32Opnd>, R3I<0b0100111>;//see below patterns ++ ++ def NOR32 : Nor<"nor", GPR32Opnd>, R3I<0b0101000>; ++ def AND32 : Int_Reg3<"and", GPR32Opnd, and>, R3I<0b0101001>; ++ def OR32 : Int_Reg3<"or", GPR32Opnd, or>, R3I<0b0101010>; ++ def XOR32 : Int_Reg3<"xor", GPR32Opnd, xor>, R3I<0b0101011>; ++ def ANDN32 : Int_Reg3<"andn", GPR32Opnd>, R3I<0b0101101>; ++ def ORN32 : Int_Reg3<"orn", GPR32Opnd>, R3I<0b0101100>; ++ } ++ ++ def SLL_W : Shift_Var<"sll.w", GPR32Opnd, shl>, R3I<0b0101110>; ++ def SRL_W : Shift_Var<"srl.w", GPR32Opnd, srl>, R3I<0b0101111>; ++ def SRA_W : Shift_Var<"sra.w", GPR32Opnd, sra>, R3I<0b0110000>; ++ def ROTR_W: Shift_Var<"rotr.w", GPR32Opnd, rotr>, R3I<0b0110110>; ++ ++ def MUL_W : Int_Reg3<"mul.w", GPR32Opnd, mul>, R3I<0b0111000>; ++ def MULH_W : Int_Reg3<"mulh.w", GPR32Opnd, mulhs>, R3I<0b0111001>; ++ def MULH_WU : Int_Reg3<"mulh.wu", GPR32Opnd, mulhu>, R3I<0b0111010>; ++ ++let usesCustomInserter = 1 in { ++ def DIV_W : Int_Reg3<"div.w", GPR32Opnd, sdiv>, R3I<0b1000000>; ++ def MOD_W : Int_Reg3<"mod.w", GPR32Opnd, srem>, R3I<0b1000001>; ++ def DIV_WU : Int_Reg3<"div.wu", GPR32Opnd, udiv>, R3I<0b1000010>; ++ def MOD_WU : Int_Reg3<"mod.wu", GPR32Opnd, urem>, R3I<0b1000011>; ++} ++ ++ def CRC_W_B_W : Int_Reg3<"crc.w.b.w", GPR32Opnd, int_loongarch_crc_w_b_w>, R3I<0b1001000>; ++ def CRC_W_H_W : Int_Reg3<"crc.w.h.w", GPR32Opnd, int_loongarch_crc_w_h_w>, R3I<0b1001001>; ++ def CRC_W_W_W : Int_Reg3<"crc.w.w.w", GPR32Opnd, int_loongarch_crc_w_w_w>, R3I<0b1001010>; ++ def CRCC_W_B_W : Int_Reg3<"crcc.w.b.w", GPR32Opnd, int_loongarch_crcc_w_b_w>, R3I<0b1001100>; ++ def CRCC_W_H_W : Int_Reg3<"crcc.w.h.w", GPR32Opnd, int_loongarch_crcc_w_h_w>, R3I<0b1001101>; ++ def CRCC_W_W_W : Int_Reg3<"crcc.w.w.w", GPR32Opnd, int_loongarch_crcc_w_w_w>, R3I<0b1001110>; ++ /// ++ /// SLLI ++ /// ++ def SLLI_W : Shift_Imm32<"slli.w", GPR32Opnd, shl>, R2_IMM5<0b00>; ++ def SRLI_W : Shift_Imm32<"srli.w", GPR32Opnd, srl>, R2_IMM5<0b01>; ++ def SRAI_W : Shift_Imm32<"srai.w", GPR32Opnd, sra>, R2_IMM5<0b10>; ++ def ROTRI_W : Shift_Imm32<"rotri.w", GPR32Opnd, rotr>, R2_IMM5<0b11>; ++ /// ++ /// Misc ++ /// ++ def ALSL_W : Reg3_Sa<"alsl.w", GPR32Opnd, uimm2_plus1>, R3_SA2<0b00010> { ++ let Pattern = [(set GPR32Opnd:$rd, ++ (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))]; ++ } ++ def BYTEPICK_W : Reg3_Sa<"bytepick.w", GPR32Opnd, uimm2>, R3_SA2<0b00100>;//pattern:[] ++ ++ def BREAK : Code15<"break", int_loongarch_break>, CODE15<0b1010100>; ++ def SYSCALL : Code15<"syscall", int_loongarch_syscall>, CODE15<0b1010110>; ++ def TRAP : TrapBase; ++ ++ def BSTRINS_W : InsBase_32<"bstrins.w", GPR32Opnd, uimm5, LoongArchBstrins>, ++ INSERT_BIT32<0>; ++ def BSTRPICK_W : PickBase_32<"bstrpick.w", GPR32Opnd, uimm5, LoongArchBstrpick>, ++ INSERT_BIT32<1>; ++ ++ /// ++ /// R2_IMM12 ++ /// ++ let isCodeGenOnly = 1 in { ++ def SLTI32 : SetCC_I<"slti", GPR32Opnd, simm12_32>, R2_IMM12<0b000>; //PatFrag ++ def SLTUI32 : SetCC_I<"sltui", GPR32Opnd, simm12_32>, R2_IMM12<0b001>; //PatFrag ++ } ++ def ADDI_W : Int_Reg2_Imm12<"addi.w", GPR32Opnd, simm12_32, add>, R2_IMM12<0b010>; ++ ++ let isCodeGenOnly = 1 in { ++ def ANDI32 : Int_Reg2_Imm12<"andi", GPR32Opnd, uimm12_32, and>, R2_IMM12<0b101>; ++ def ORI32 : Int_Reg2_Imm12<"ori", GPR32Opnd, uimm12_32, or>, R2_IMM12<0b110>; ++ def XORI32 : Int_Reg2_Imm12<"xori", GPR32Opnd, uimm12_32, xor>, R2_IMM12<0b111>; ++ } ++ ++ /// ++ /// Privilege Instructions ++ /// ++ def CSRRD32 : CSR<"csrrd", GPR32Opnd, uimm14_32, int_loongarch_csrrd_w>, R1_CSR<0b0000000000100>; ++ def CSRWR32 : CSRW<"csrwr", GPR32Opnd, uimm14_32, int_loongarch_csrwr_w>, R1_CSR<0b0000100000100>; ++ def CSRXCHG32 : CSRX<"csrxchg", GPR32Opnd, uimm14_32, int_loongarch_csrxchg_w>, R2_CSR<0b00000100>; ++ def IOCSRRD_B32 : Int_Reg2<"iocsrrd.b", GPR32Opnd, int_loongarch_iocsrrd_b>, R2P<0b000>; ++ def IOCSRRD_H32 : Int_Reg2<"iocsrrd.h", GPR32Opnd, int_loongarch_iocsrrd_h>, R2P<0b001>; ++ def IOCSRRD_W32 : Int_Reg2<"iocsrrd.w", GPR32Opnd, int_loongarch_iocsrrd_w>, R2P<0b010>; ++ def IOCSRWR_B32 : Int_Reg2_Iocsrwr<"iocsrwr.b", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_b>, R2P<0b100>; ++ def IOCSRWR_H32 : Int_Reg2_Iocsrwr<"iocsrwr.h", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_h>, R2P<0b101>; ++ def IOCSRWR_W32 : Int_Reg2_Iocsrwr<"iocsrwr.w", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_w>, R2P<0b110>; ++ def CACOP32 : CAC<"cacop", GPR32Opnd, simm12_32, int_loongarch_cacop_w>, R1_CACHE; ++ def LDDIR32 : LEVEL<"lddir", GPR32Opnd>, R2_LEVEL<0b00000110010000>; ++ def LDPTE32 : SEQ<"ldpte", GPR32Opnd>, R1_SEQ<0b00000110010001>; ++ ++ //def WAIT : Wait<"wait">; ++ // ++ //def IOCSRRD_D : R2P<0b011>, Int_Reg2<"iocsrrd.d", GPR32Opnd>; ++ //def IOCSRWR_D : R2P<0b111>, Int_Reg2<"iocsrwr.d", GPR32Opnd>; ++ // ++ //def TLBINV : IMM32<0b001000>, OP32<"tlbinv">; ++ //def TLBFLUSH : IMM32<0b001001>, OP32<"tlbflush">; ++ //def TLBP : IMM32<0b001010>, OP32<"tlbp">; ++ //def TLBR : IMM32<0b001011>, OP32<"tlbr">; ++ //def TLBWI : IMM32<0b001100>, OP32<"tlbwi">; ++ //def TLBWR : IMM32<0b001101>, OP32<"tlbwr">; ++ ++ /// ++ /// R1_IMM20 ++ /// ++ let isCodeGenOnly = 1 in { ++ def LU12I_W32 : SI20<"lu12i.w", GPR32Opnd, simm20_32>, R1_SI20<0b0001010>; ++ def PCADDI32 : SI20<"pcaddi", GPR32Opnd, simm20_32>, R1_SI20<0b0001100>; ++ def PCALAU12I32 : SI20<"pcalau12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001101>; ++ def PCADDU12I32 : SI20<"pcaddu12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001110>; ++ } ++ ++ let isCodeGenOnly = 1 in { ++ def BEQZ32 : Beqz<"beqz", brtarget, seteq, GPR32Opnd>, R1_IMM21BEQZ<0b010000>; ++ def BNEZ32 : Beqz<"bnez", brtarget, setne, GPR32Opnd>, R1_IMM21BEQZ<0b010001>; ++ ++ def JIRL32 : FJirl<"jirl", calltarget, GPR32Opnd>, R2_IMM16JIRL; ++ ++ def B32 : JumpFB, IMM26B<0b010100>; ++ ++ def BEQ32 : Beq<"beq", brtarget, seteq, GPR32Opnd>, R2_IMM16BEQ<0b010110>; ++ def BNE32 : Beq<"bne", brtarget, setne, GPR32Opnd>, R2_IMM16BEQ<0b010111>; ++ def BLT32 : Beq<"blt", brtarget, setlt, GPR32Opnd>, R2_IMM16BEQ<0b011000>; ++ def BGE32 : Beq<"bge", brtarget, setge, GPR32Opnd>, R2_IMM16BEQ<0b011001>; ++ def BLTU32 : Beq<"bltu", brtarget, setult, GPR32Opnd>, R2_IMM16BEQ<0b011010>; ++ def BGEU32 : Beq<"bgeu", brtarget, setuge, GPR32Opnd>, R2_IMM16BEQ<0b011011>; ++ } ++ ++ /// ++ /// Mem access ++ /// ++ def LL_W : LLBase<"ll.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b000>; ++ def SC_W : SCBase<"sc.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b001>; ++ ++ def PRELD_Raw32 : Preld_Raw<"preld", GPR32Opnd>, PRELD_FM; ++ ++ let isCodeGenOnly = 1 in { ++ def LD_B32 : Ld<"ld.b", GPR32Opnd, mem_simmptr, sextloadi8>, LOAD_STORE<0b0000>; ++ def LD_H32 : Ld<"ld.h", GPR32Opnd, mem_simmptr, sextloadi16, addrDefault>, LOAD_STORE<0b0001>; ++ def LD_W32 : Ld<"ld.w", GPR32Opnd, mem, load, addrDefault>, LOAD_STORE<0b0010>; ++ def ST_B32 : St<"st.b", GPR32Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; ++ def ST_H32 : St<"st.h", GPR32Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; ++ def ST_W32 : St<"st.w", GPR32Opnd, mem, store>, LOAD_STORE<0b0110>; ++ def LD_BU32 : Ld<"ld.bu", GPR32Opnd, mem_simmptr, zextloadi8, addrDefault>, LOAD_STORE<0b1000>; ++ def LD_HU32 : Ld<"ld.hu", GPR32Opnd, mem_simmptr, zextloadi16>, LOAD_STORE<0b1001>; ++ ++ def PRELD32 : Preld<"preld", mem, GPR32Opnd>, PRELD_FM; ++ ++ def LDPTR_W32 : LdPtr<"ldptr.w", GPR32Opnd>, LL_SC<0b100>; ++ def STPTR_W32 : StPtr<"stptr.w", GPR32Opnd>, LL_SC<0b101>; ++ } ++ ++ def IBAR : Bar<"ibar", int_loongarch_ibar>, BAR_FM<1>; ++ def DBAR : Bar<"dbar", int_loongarch_dbar>, BAR_FM<0>; ++ ++ def LONG_BRANCH_ADDIW : LoongArchPseudo<(outs GPR32Opnd:$dst), ++ (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; ++ ++ def LONG_BRANCH_ADDIW2Op : LoongArchPseudo<(outs GPR32Opnd:$dst), ++ (ins GPR32Opnd:$src, brtarget:$tgt), []>; ++ ++ def PseudoReturn : PseudoReturnBase; ++ ++ let isCodeGenOnly = 1 in { ++ def LDX_W32 : LDX_FT_LA<"ldx.w", GPR32Opnd, load>, ++ R3MI<0b00010000>; ++ def LDX_HU32 : LDX_FT_LA<"ldx.hu", GPR32Opnd, extloadi16>, ++ R3MI<0b01001000>; ++ def LDX_BU32 : LDX_FT_LA<"ldx.bu", GPR32Opnd, extloadi8>, ++ R3MI<0b01000000>; ++ def STX_W32 : STX_FT_LA<"stx.w", GPR32Opnd, store>, ++ R3MI<0b00110000>; ++ def LDX_H32 : LDX_FT_LA<"ldx.h", GPR32Opnd, sextloadi16>, ++ R3MI<0b00001000>; ++ def LDX_B32 : LDX_FT_LA<"ldx.b", GPR32Opnd, sextloadi8>, ++ R3MI<0b00000000>; ++ def STX_B32 : STX_FT_LA<"stx.b", GPR32Opnd, truncstorei8>, ++ R3MI<0b00100000>; ++ def STX_H32 : STX_FT_LA<"stx.h", GPR32Opnd, truncstorei16>, ++ R3MI<0b00101000>; ++ } ++} ++ ++def LEA_ADDI_W: EffectiveAddress<"addi.w", GPR32Opnd>, LEA_ADDI_FM<0b010>; ++ ++def : LoongArchPat<(LoongArchAddress (i32 tglobaladdr:$in)), ++ (ADDI_W (PCADDU12I32 tglobaladdr:$in) ,0)>,GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 tblockaddress:$in)), ++ (ADDI_W (PCADDU12I32 tblockaddress:$in),0)>, GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 tjumptable:$in)), ++ (ADDI_W (PCADDU12I32 tjumptable:$in),0)>, GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 texternalsym:$in)), ++ (ADDI_W (PCADDU12I32 texternalsym:$in),0)>, GPR_32; ++ ++//===----------------------------------------------------------------------===// ++// Arbitrary patterns that map to one or more instructions ++//===----------------------------------------------------------------------===// ++ ++let isCodeGenOnly = 1 in { ++ def REVB_2W_32 : Int_Reg2<"revb.2w", GPR32Opnd>, R2I<0b01110>; ++ def REVH_2W_32 : Int_Reg2<"revh.2w", GPR32Opnd>, R2I<0b10000>; ++} ++ ++// bswap pattern ++def : LoongArchPat<(bswap GPR32:$rj), (ROTRI_W (REVB_2H GPR32:$rj), 16)>; ++//def : LoongArchPat<(bswap GPR32:$rj), (REVB_2W_32 GPR32:$rj)>; ++//def : LoongArchPat<(bswap GPR32:$rj), (REVH_2W_32 (REVB_2H GPR32:$rj))>; ++ ++// i32 selects ++multiclass SelectInt_Pats { ++ ++// reg, immz ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), ++ (OROp (MASKNEZOp RC:$t, RC:$cond), (MASKEQZOp RC:$f, RC:$cond))>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), ++ (OROp (MASKEQZOp RC:$t, RC:$cond), (MASKNEZOp RC:$f, RC:$cond))>; ++ ++//def : LoongArchPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), ++// (MASKEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; ++//def : LoongArchPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), ++// (OROp (MASKEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), ++// (MASKNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; ++ ++// reg, immSExt12Plus1 ++//def : LoongArchPat<(select (Opg (setgt RC:$cond, immSExt12Plus1:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), ++// (MASKEQZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; ++//def : LoongArchPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), ++// (MASKEQZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; ++ ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), ++ (MASKNEZOp RC:$t, RC:$cond)>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), ++ (MASKEQZOp RC:$t, RC:$cond)>; ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), ++ (MASKEQZOp RC:$f, RC:$cond)>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), ++ (MASKNEZOp RC:$f, RC:$cond)>; ++} ++ ++defm : SelectInt_Pats; ++ ++def : LoongArchPat<(select i32:$cond, i32:$t, i32:$f), ++ (OR32 (MASKEQZ32 i32:$t, i32:$cond), ++ (MASKNEZ32 i32:$f, i32:$cond))>; ++def : LoongArchPat<(select i32:$cond, i32:$t, immz), ++ (MASKEQZ32 i32:$t, i32:$cond)>; ++def : LoongArchPat<(select i32:$cond, immz, i32:$f), ++ (MASKNEZ32 i32:$f, i32:$cond)>; ++ ++// truncate ++def : LoongArchPat<(i32 (trunc (assertzext_lt_i32 GPR64:$src))), ++ (EXTRACT_SUBREG GPR64:$src, sub_32)>, GPR_64; ++def : LoongArchPat<(i32 (trunc GPR64:$src)), ++ (SLLI_W (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, GPR_64; ++ ++// Patterns used for matching away redundant sign extensions. ++// LA32 arithmetic instructions sign extend their result implicitly. ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; ++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; ++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; ++ ++def : InstAlias<"break", (BREAK 0), 0>; ++def : LoongArchInstAlias<"move $dst, $src", ++ (OR32 GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32; ++ ++def immSExt12Plus1 : PatLeaf<(imm), [{ ++ return isInt<13>(N->getSExtValue()) && isInt<12>(N->getSExtValue() + 1); ++}]>; ++ ++def Plus1 : SDNodeXFormgetSExtValue() + 1); }]>; ++ ++multiclass BrcondPats { ++ ++def : LoongArchPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), ++ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), ++ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, immSExt12:$rhs)), bb:$dst), ++ (BEQOp1 (SLTIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, immSExt12:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setgt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), ++ (BEQOp1 (SLTIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setugt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond RC:$cond, bb:$dst), ++ (BNEOp RC:$cond, ZEROReg, bb:$dst)>; ++} ++ ++defm : BrcondPats, GPR_64; ++ ++let usesCustomInserter = 1 in { ++ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; ++ ++ def ATOMIC_SWAP_I8 : Atomic2Ops; ++ def ATOMIC_SWAP_I16 : Atomic2Ops; ++ def ATOMIC_SWAP_I32 : Atomic2Ops; ++ ++ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; ++ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; ++ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; ++ ++ def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_MAX_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_MIN_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops; ++} ++ ++def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA; ++def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA; ++def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA; ++ ++def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA; ++ ++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B32 addr:$a)>; ++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H32 addr:$a)>; ++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W32 addrimm14lsl2:$a)>; ++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W32 addr:$a)>; ++ ++def : LoongArchPat<(atomic_store_8 addr:$a, GPR32:$v), ++ (ST_B32 GPR32:$v, addr:$a)>; ++def : LoongArchPat<(atomic_store_16 addr:$a, GPR32:$v), ++ (ST_H32 GPR32:$v, addr:$a)>; ++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), ++ (STPTR_W32 GPR32:$v, addrimm14lsl2:$a)>; ++def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), ++ (ST_W32 GPR32:$v, addr:$a)>; ++ ++def : LoongArchPat<(LoongArchDBAR (i32 immz)), ++ (DBAR 0)>; ++ ++def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; ++def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; ++def : LoongArchPat<(i32 (extloadi16 addr:$src)), (LD_HU32 addr:$src)>; ++ ++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ ++ def : LoadRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++} ++ ++let isCall=1, isCTI=1, Defs = [RA] in { ++ ++ class JumpLinkRegPseudo: ++ LoongArchPseudo<(outs), (ins RO:$rj), [(LoongArchJmpLink RO:$rj)]>, ++ PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { ++ let hasPostISelHook = 1; ++ } ++ ++ class JumpLinkReg: ++ InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj, 0"), ++ [], FrmR, opstr> { ++ let hasPostISelHook = 1; ++ } ++ ++} ++ ++def JIRLR : JumpLinkReg<"jirl", GPR32Opnd>, R2_IMM16JIRL { ++ let offs16 = 0; ++} ++def JIRLRPseudo : JumpLinkRegPseudo; ++ ++class BrindRegPseudo: ++ LoongArchPseudo<(outs), (ins RO:$rj), [(brind RO:$rj)]>, ++ PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { ++ let isTerminator=1; ++ let isBarrier=1; ++ let isBranch = 1; ++ let isIndirectBranch = 1; ++ bit isCTI = 1; ++} ++ ++def JIRLRBRIND : BrindRegPseudo; ++ ++def : LoongArchPat<(addc GPR32:$src, immSExt12:$imm), ++ (ADDI_W GPR32:$src, imm:$imm)>; ++ ++defm : SeteqPats; ++defm : SetlePats; ++defm : SetgtPats; ++defm : SetgePats; ++defm : SetgeImmPats; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immZExt12:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (immZExt12:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immSExt12:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADDI_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (immSExt12:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRA_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (mul (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (uimm12_32:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (uimm12_32:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (OR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), ++ (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond)), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (srem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MOD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(atomic_store_32 addr:$a, (i32 (trunc (i64 (assertsext GPR64:$rj))))), ++ (ST_W32 (EXTRACT_SUBREG GPR64:$rj, sub_32), addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (sub (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (udiv (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (DIV_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (urem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MOD_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(brcond (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0)), bb:$offs21), ++ (BEQZ32 (EXTRACT_SUBREG GPR64:$rj, sub_32), brtarget:$offs21)>; ++ ++def : LoongArchPat<(setne (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0), ++ (SLTU32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>; ++ ++def : LoongArchPat<(select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))), ++ (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), ++ (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond))>; ++ ++def : LoongArchPat<(select (i32 (setne (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), ++ (MASKNEZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; ++ ++def : LoongArchPat<(select (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), ++ (MASKEQZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; ++ ++ def : LoongArchPat<(store (i32 (trunc (i64 (assertsext GPR64:$v)))), addr:$a), ++ (ST_W32 (EXTRACT_SUBREG GPR64:$v, sub_32), addr:$a)>; ++ ++ ++def : LoongArchPat<(i32 (xor GPR32:$rj, (i32 -1))), ++ (NOR32 ZERO, GPR32:$rj)>; ++ ++def : LoongArchPat<(and GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))), ++ (ANDN32 GPR32:$rj, GPR32:$rk)>; ++ ++def : LoongArchPat< ++ (i64 ++ (sext ++ (i32 (and (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), ++ (i32 -1)))) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat< ++ (i64 ++ (sext ++ (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), ++ (i32 -1)))) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (trunc (i64 (assertsext GPR64:$rk)))))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (trunc (i64 (or (i64 (assertsext GPR64:$rj)), ++ (i64 (assertsext GPR64:$rk)))))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 (EXTRACT_SUBREG GPR64:$rk, sub_32), ++ (EXTRACT_SUBREG GPR64:$rj, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (zext ++ (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 0)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (SLTUI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (i32 1)), ++ sub_32 ++ )>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +index 1467d1757..afa38dbf2 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +@@ -1,4 +1,4 @@ +-//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer -*- C++ -*--=// ++//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer --------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -12,40 +12,622 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchAsmPrinter.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "LoongArch.h" ++#include "LoongArchMCInstLower.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchSubtarget.h" + #include "LoongArchTargetMachine.h" +-#include "TargetInfo/LoongArchTargetInfo.h" +-#include "llvm/CodeGen/AsmPrinter.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/ADT/SmallString.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/ADT/Twine.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineConstantPool.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineJumpTableInfo.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/BasicBlock.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/InlineAsm.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/MCObjectFileInfo.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MCSymbolELF.h" + #include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++#include ++#include ++#include ++#include ++#include + + using namespace llvm; + + #define DEBUG_TYPE "loongarch-asm-printer" + +-// Simple pseudo-instructions have their lowering (with expansion to real +-// instructions) auto-generated. ++LoongArchTargetStreamer &LoongArchAsmPrinter::getTargetStreamer() const { ++ return static_cast(*OutStreamer->getTargetStreamer()); ++} ++ ++bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { ++ Subtarget = &MF.getSubtarget(); ++ ++ LoongArchFI = MF.getInfo(); ++ MCP = MF.getConstantPool(); ++ ++ AsmPrinter::runOnMachineFunction(MF); ++ ++ emitXRayTable(); ++ ++ return true; ++} ++ ++bool LoongArchAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { ++ MCOp = MCInstLowering.LowerOperand(MO); ++ return MCOp.isValid(); ++} ++ + #include "LoongArchGenMCPseudoLowering.inc" + ++// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to ++// JIRL as appropriate for the target. ++void LoongArchAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI) { ++ bool HasLinkReg = false; ++ MCInst TmpInst0; ++ TmpInst0.setOpcode(LoongArch::JIRL); ++ HasLinkReg = true; ++ ++ MCOperand MCOp; ++ ++ if (HasLinkReg) { ++ unsigned ZeroReg = Subtarget->is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ TmpInst0.addOperand(MCOperand::createReg(ZeroReg)); ++ } ++ ++ lowerOperand(MI->getOperand(0), MCOp); ++ TmpInst0.addOperand(MCOp); ++ ++ TmpInst0.addOperand(MCOperand::createImm(0)); ++ ++ EmitToStreamer(OutStreamer, TmpInst0); ++} ++ ++void LoongArchAsmPrinter::emitPseudoTailBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(LoongArch::B); ++ ++ MCOperand MCOp; ++ ++ lowerOperand(MI->getOperand(0), MCOp); ++ TmpInst.addOperand(MCOp); ++ ++ EmitToStreamer(OutStreamer, TmpInst); ++} ++ + void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { +- LoongArch_MC::verifyInstructionPredicates( +- MI->getOpcode(), getSubtargetInfo().getFeatureBits()); ++ LoongArchTargetStreamer &TS = getTargetStreamer(); ++ unsigned Opc = MI->getOpcode(); ++ TS.forbidModuleDirective(); + +- // Do any auto-generated pseudo lowerings. +- if (emitPseudoExpansionLowering(*OutStreamer, MI)) ++ if (MI->isDebugValue()) { ++ SmallString<128> Str; ++ raw_svector_ostream OS(Str); ++ ++ PrintDebugValueComment(MI, OS); ++ return; ++ } ++ if (MI->isDebugLabel()) + return; ++ // If we just ended a constant pool, mark it as such. ++ OutStreamer->emitDataRegion(MCDR_DataRegionEnd); ++ InConstantPool = false; + +- MCInst TmpInst; +- if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) +- EmitToStreamer(*OutStreamer, TmpInst); ++ switch (Opc) { ++ case LoongArch::PATCHABLE_FUNCTION_ENTER: ++ LowerPATCHABLE_FUNCTION_ENTER(*MI); ++ return; ++ case LoongArch::PATCHABLE_FUNCTION_EXIT: ++ LowerPATCHABLE_FUNCTION_EXIT(*MI); ++ return; ++ case LoongArch::PATCHABLE_TAIL_CALL: ++ LowerPATCHABLE_TAIL_CALL(*MI); ++ return; ++ } ++ MachineBasicBlock::const_instr_iterator I = MI->getIterator(); ++ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); ++ ++ do { ++ // Do any auto-generated pseudo lowerings. ++ if (emitPseudoExpansionLowering(*OutStreamer, &*I)) ++ continue; ++ if (I->getOpcode() == LoongArch::PseudoReturn || ++ I->getOpcode() == LoongArch::PseudoReturn64){ ++ emitPseudoIndirectBranch(*OutStreamer, &*I); ++ continue; ++ } ++ if (I->getOpcode() == LoongArch::PseudoTailReturn){ ++ emitPseudoTailBranch(*OutStreamer, &*I); ++ continue; ++ } ++ ++ // Some instructions are marked as pseudo right now which ++ // would make the test fail for the wrong reason but ++ // that will be fixed soon. We need this here because we are ++ // removing another test for this situation downstream in the ++ // callchain. ++ // ++ if (I->isPseudo() ++ && !isLongBranchPseudo(I->getOpcode())) ++ llvm_unreachable("Pseudo opcode found in EmitInstruction()"); ++ ++ MCInst TmpInst0; ++ MCInstLowering.Lower(&*I, TmpInst0); ++ EmitToStreamer(*OutStreamer, TmpInst0); ++ } while ((++I != E) && I->isInsideBundle()); + } + +-bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { +- AsmPrinter::runOnMachineFunction(MF); +- return true; ++//===----------------------------------------------------------------------===// ++// ++// LoongArch Asm Directives ++// ++// ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Set directives ++//===----------------------------------------------------------------------===// ++ ++/// Emit Set directives. ++const char *LoongArchAsmPrinter::getCurrentABIString() const { ++ switch (static_cast(TM).getABI().GetEnumValue()) { ++ case LoongArchABIInfo::ABI::LP32: return "abilp32"; ++ case LoongArchABIInfo::ABI::LPX32: return "abilpx32"; ++ case LoongArchABIInfo::ABI::LP64: return "abilp64"; ++ default: llvm_unreachable("Unknown LoongArch ABI"); ++ } ++} ++ ++void LoongArchAsmPrinter::emitFunctionEntryLabel() { ++ ++ OutStreamer->emitLabel(CurrentFnSym); ++ ++} ++ ++/// EmitFunctionBodyStart - Targets can override this to emit stuff before ++/// the first basic block in the function. ++void LoongArchAsmPrinter::emitFunctionBodyStart() { ++ ++ MCInstLowering.Initialize(&MF->getContext()); ++} ++ ++/// EmitFunctionBodyEnd - Targets can override this to emit stuff after ++/// the last basic block in the function. ++void LoongArchAsmPrinter::emitFunctionBodyEnd() { ++ ++ // Make sure to terminate any constant pools that were at the end ++ // of the function. ++ if (!InConstantPool) ++ return; ++ InConstantPool = false; ++ OutStreamer->emitDataRegion(MCDR_DataRegionEnd); ++} ++ ++void LoongArchAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { ++ AsmPrinter::emitBasicBlockEnd(MBB); ++} ++ ++/// isBlockOnlyReachableByFallthough - Return true if the basic block has ++/// exactly one predecessor and the control transfer mechanism between ++/// the predecessor and this block is a fall-through. ++bool LoongArchAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* ++ MBB) const { ++ // The predecessor has to be immediately before this block. ++ const MachineBasicBlock *Pred = *MBB->pred_begin(); ++ ++ // If the predecessor is a switch statement, assume a jump table ++ // implementation, so it is not a fall through. ++ if (const BasicBlock *bb = Pred->getBasicBlock()) ++ if (isa(bb->getTerminator())) ++ return false; ++ ++ // Check default implementation ++ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); ++} ++ ++// Print out an operand for an inline asm expression. ++bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, ++ unsigned OpNum, const char *ExtraCode, raw_ostream &O) { ++ // Does this asm operand have a single letter operand modifier? ++ if (ExtraCode && ExtraCode[0]) { ++ if (ExtraCode[1] != 0) return true; // Unknown modifier. ++ ++ const MachineOperand &MO = MI->getOperand(OpNum); ++ switch (ExtraCode[0]) { ++ default: ++ // See if this is a generic print operand ++ return AsmPrinter::PrintAsmOperand(MI,OpNum,ExtraCode,O); ++ case 'X': // hex const int ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << "0x" << Twine::utohexstr(MO.getImm()); ++ return false; ++ case 'x': // hex const int (low 16 bits) ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); ++ return false; ++ case 'd': // decimal const int ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << MO.getImm(); ++ return false; ++ case 'm': // decimal const int minus 1 ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << MO.getImm() - 1; ++ return false; ++ case 'y': // exact log2 ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ if (!isPowerOf2_64(MO.getImm())) ++ return true; ++ O << Log2_64(MO.getImm()); ++ return false; ++ case 'z': ++ // $r0 if zero, regular printing otherwise ++ if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { ++ O << "$r0"; ++ return false; ++ } ++ // If not, call printOperand as normal. ++ break; ++ case 'D': // Second part of a double word register operand ++ case 'L': // Low order register of a double word register operand ++ case 'M': // High order register of a double word register operand ++ { ++ if (OpNum == 0) ++ return true; ++ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); ++ if (!FlagsOP.isImm()) ++ return true; ++ unsigned Flags = FlagsOP.getImm(); ++ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++ // Number of registers represented by this operand. We are looking ++ // for 2 for 32 bit mode and 1 for 64 bit mode. ++ if (NumVals != 2) { ++ if (Subtarget->is64Bit() && NumVals == 1 && MO.isReg()) { ++ unsigned Reg = MO.getReg(); ++ O << '$' << LoongArchInstPrinter::getRegisterName(Reg); ++ return false; ++ } ++ return true; ++ } ++ ++ unsigned RegOp = OpNum; ++ if (!Subtarget->is64Bit()){ ++ // Endianness reverses which register holds the high or low value ++ // between M and L. ++ switch(ExtraCode[0]) { ++ case 'M': ++ RegOp = OpNum + 1; ++ break; ++ case 'L': ++ RegOp = OpNum; ++ break; ++ case 'D': // Always the second part ++ RegOp = OpNum + 1; ++ } ++ if (RegOp >= MI->getNumOperands()) ++ return true; ++ const MachineOperand &MO = MI->getOperand(RegOp); ++ if (!MO.isReg()) ++ return true; ++ unsigned Reg = MO.getReg(); ++ O << '$' << LoongArchInstPrinter::getRegisterName(Reg); ++ return false; ++ } ++ break; ++ } ++ case 'w': ++ // Print LSX registers for the 'f' constraint ++ // In LLVM, the 'w' modifier doesn't need to do anything. ++ // We can just call printOperand as normal. ++ break; ++ case 'u': ++ // Print LASX registers for the 'f' constraint ++ // In LLVM, the 'u' modifier doesn't need to do anything. ++ // We can just call printOperand as normal. ++ break; ++ } ++ } ++ ++ printOperand(MI, OpNum, O); ++ return false; ++} ++ ++bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, ++ unsigned OpNum, ++ const char *ExtraCode, ++ raw_ostream &O) { ++ assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); ++ const MachineOperand &BaseMO = MI->getOperand(OpNum); ++ const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); ++ assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); ++ assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); ++ int Offset = OffsetMO.getImm(); ++ ++ // Currently we are expecting either no ExtraCode or 'D','M','L'. ++ if (ExtraCode) { ++ switch (ExtraCode[0]) { ++ case 'D': ++ case 'M': ++ Offset += 4; ++ break; ++ case 'L': ++ break; ++ default: ++ return true; // Unknown modifier. ++ } ++ } ++ ++ O << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()) << ", " << Offset; ++ ++ return false; ++} ++ ++void LoongArchAsmPrinter::printOperand(const MachineInstr *MI, int opNum, ++ raw_ostream &O) { ++ const MachineOperand &MO = MI->getOperand(opNum); ++ ++ switch (MO.getType()) { ++ case MachineOperand::MO_Register: ++ O << '$' ++ << StringRef(LoongArchInstPrinter::getRegisterName(MO.getReg())).lower(); ++ break; ++ ++ case MachineOperand::MO_Immediate: ++ O << MO.getImm(); ++ break; ++ ++ case MachineOperand::MO_MachineBasicBlock: ++ MO.getMBB()->getSymbol()->print(O, MAI); ++ return; ++ ++ case MachineOperand::MO_GlobalAddress: ++ getSymbol(MO.getGlobal())->print(O, MAI); ++ break; ++ ++ case MachineOperand::MO_BlockAddress: { ++ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); ++ O << BA->getName(); ++ break; ++ } ++ ++ case MachineOperand::MO_ConstantPoolIndex: ++ O << getDataLayout().getPrivateGlobalPrefix() << "CPI" ++ << getFunctionNumber() << "_" << MO.getIndex(); ++ if (MO.getOffset()) ++ O << "+" << MO.getOffset(); ++ break; ++ ++ default: ++ llvm_unreachable(""); ++ } ++} ++ ++void LoongArchAsmPrinter:: ++printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ // Load/Store memory operands -- imm($reg) ++ // If PIC target the target is loaded as the ++ // pattern lw $25,%call16($28) ++ ++ printOperand(MI, opNum+1, O); ++ O << "("; ++ printOperand(MI, opNum, O); ++ O << ")"; ++} ++ ++void LoongArchAsmPrinter:: ++printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ // when using stack locations for not load/store instructions ++ // print the same way as all normal 3 operand instructions. ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchAsmPrinter:: ++printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) { ++ if (i != opNum) O << ", "; ++ printOperand(MI, i, O); ++ } ++} ++ ++void LoongArchAsmPrinter::emitStartOfAsmFile(Module &M) { ++ LoongArchTargetStreamer &TS = getTargetStreamer(); ++ ++ // LoongArchTargetStreamer has an initialization order problem when emitting an ++ // object file directly (see LoongArchTargetELFStreamer for full details). Work ++ // around it by re-initializing the PIC state here. ++ TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent()); ++ ++ // Compute LoongArch architecture attributes based on the default subtarget ++ // that we'd have constructed. Module level directives aren't LTO ++ // clean anyhow. ++ // FIXME: For ifunc related functions we could iterate over and look ++ // for a feature string that doesn't match the default one. ++ const Triple &TT = TM.getTargetTriple(); ++ StringRef CPU = LoongArch_MC::selectLoongArchCPU(TT, TM.getTargetCPU()); ++ StringRef FS = TM.getTargetFeatureString(); ++ const LoongArchTargetMachine &MTM = static_cast(TM); ++ const LoongArchSubtarget STI(TT, CPU, FS, MTM, None); ++ ++ TS.updateABIInfo(STI); ++} ++ ++void LoongArchAsmPrinter::emitInlineAsmStart() const { ++ ++ OutStreamer->addBlankLine(); ++} ++ ++void LoongArchAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, ++ const MCSubtargetInfo *EndInfo) const { ++ OutStreamer->addBlankLine(); ++} ++ ++void LoongArchAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg) { ++ MCInst I; ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI, ++ unsigned Opcode, unsigned Reg1, ++ unsigned Reg2) { ++ MCInst I; ++ // ++ // Because of the current td files for LoongArch32, the operands for MTC1 ++ // appear backwards from their normal assembly order. It's not a trivial ++ // change to fix this in the td file so we adjust for it here. ++ // ++ if (Opcode == LoongArch::MOVGR2FR_W) { ++ unsigned Temp = Reg1; ++ Reg1 = Reg2; ++ Reg2 = Temp; ++ } ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg1)); ++ I.addOperand(MCOperand::createReg(Reg2)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI, ++ unsigned Opcode, unsigned Reg1, ++ unsigned Reg2, unsigned Reg3) { ++ MCInst I; ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg1)); ++ I.addOperand(MCOperand::createReg(Reg2)); ++ I.addOperand(MCOperand::createReg(Reg3)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI, ++ unsigned MovOpc, unsigned Reg1, ++ unsigned Reg2, unsigned FPReg1, ++ unsigned FPReg2, bool LE) { ++ if (!LE) { ++ unsigned temp = Reg1; ++ Reg1 = Reg2; ++ Reg2 = temp; ++ } ++ EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1); ++ EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2); ++} ++ ++void LoongArchAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { ++ const uint8_t NoopsInSledCount = 11; ++ // For loongarch64 we want to emit the following pattern: ++ // ++ // .Lxray_sled_N: ++ // ALIGN ++ // B .tmpN ++ // 11 NOP instructions (44 bytes) ++ // .tmpN ++ // ++ // We need the 44 bytes (11 instructions) because at runtime, we'd ++ // be patching over the full 48 bytes (12 instructions) with the following ++ // pattern: ++ // ++ // addi.d sp,sp, -16 ;create stack frame ++ // st.d ra, sp, 8 ;save return address ++ // lu12i.w t0,%%abs_hi20(__xray_FunctionEntry/Exit) ++ // ori t0,t0,%%abs_lo12(__xray_FunctionEntry/Exit) ++ // lu32i.d t0,%%abs64_lo20(__xray_FunctionEntry/Exit) ++ // lu52i.d t0,t0,%%abs64_hi12(__xray_FunctionEntry/Exit) ++ // lu12i.w t1,%%abs_hi20(function_id) ++ // ori t1,t1,%%abs_lo12(function_id) ;pass function id ++ // jirl ra, t0, 0 ;call Tracing hook ++ // ld.d ra, sp, 8 ;restore return address ++ // addi.d sp, sp, 16 ;delete stack frame ++ ++ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo()); ++ auto CurSled = OutContext.createTempSymbol("xray_sled_", true); ++ OutStreamer->emitLabel(CurSled); ++ auto Target = OutContext.createTempSymbol(); ++ ++ // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual ++ // start of function ++ const MCExpr *TargetExpr = MCSymbolRefExpr::create( ++ Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); ++ EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::BEQ) ++ .addReg(LoongArch::ZERO) ++ .addReg(LoongArch::ZERO) ++ .addExpr(TargetExpr)); ++ ++ for (int8_t I = 0; I < NoopsInSledCount; I++) ++ EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::ANDI) ++ .addReg(LoongArch::ZERO) ++ .addReg(LoongArch::ZERO) ++ .addImm(0)); ++ ++ OutStreamer->emitLabel(Target); ++ recordSled(CurSled, MI, Kind, 2); ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::FUNCTION_ENTER); ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::FUNCTION_EXIT); ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::TAIL_CALL); ++} ++ ++void LoongArchAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, ++ raw_ostream &OS) { ++ // TODO: implement ++} ++ ++bool LoongArchAsmPrinter::isLongBranchPseudo(int Opcode) const { ++ return (Opcode == LoongArch::LONG_BRANCH_ADDIW ++ || Opcode == LoongArch::LONG_BRANCH_ADDIW2Op ++ || Opcode == LoongArch::LONG_BRANCH_ADDID ++ || Opcode == LoongArch::LONG_BRANCH_ADDID2Op ++ || Opcode == LoongArch::LONG_BRANCH_PCADDU12I); + } + + // Force static initialization. + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() { + RegisterAsmPrinter X(getTheLoongArch32Target()); +- RegisterAsmPrinter Y(getTheLoongArch64Target()); ++ RegisterAsmPrinter A(getTheLoongArch64Target()); + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +index b51c19188..3e4ca8ed1 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h ++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +@@ -1,4 +1,4 @@ +-//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -*- C++ -*--===// ++//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -----------*- C++ -*--===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -13,36 +13,123 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H + ++#include "LoongArchMCInstLower.h" + #include "LoongArchSubtarget.h" + #include "llvm/CodeGen/AsmPrinter.h" + #include "llvm/MC/MCStreamer.h" + #include "llvm/Support/Compiler.h" ++#include ++#include ++#include + + namespace llvm { + ++class MCOperand; ++class MCSubtargetInfo; ++class MCSymbol; ++class MachineBasicBlock; ++class MachineConstantPool; ++class MachineFunction; ++class MachineInstr; ++class MachineOperand; ++class LoongArchFunctionInfo; ++class LoongArchTargetStreamer; ++class Module; ++class raw_ostream; ++class TargetMachine; ++ + class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { +- const MCSubtargetInfo *STI; ++ LoongArchTargetStreamer &getTargetStreamer() const; ++ ++ void EmitInstrWithMacroNoAT(const MachineInstr *MI); ++ ++ //===------------------------------------------------------------------===// ++ // XRay implementation ++ //===------------------------------------------------------------------===// + + public: +- explicit LoongArchAsmPrinter(TargetMachine &TM, +- std::unique_ptr Streamer) +- : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {} ++ // XRay-specific lowering for LoongArch. ++ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); ++ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); ++ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + +- StringRef getPassName() const override { +- return "LoongArch Assembly Printer"; +- } ++private: ++ /// MCP - Keep a pointer to constantpool entries of the current ++ /// MachineFunction. ++ const MachineConstantPool *MCP = nullptr; + +- bool runOnMachineFunction(MachineFunction &MF) override; ++ /// InConstantPool - Maintain state when emitting a sequence of constant ++ /// pool entries so we can properly mark them as data regions. ++ bool InConstantPool = false; + +- void emitInstruction(const MachineInstr *MI) override; ++ void EmitSled(const MachineInstr &MI, SledKind Kind); + + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); +- // Wrapper needed for tblgenned pseudo lowering. +- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { +- return lowerLoongArchMachineOperandToMCOperand(MO, MCOp, *this); +- } ++ ++ // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, ++ // and PseudoIndirectBranch64 as a JIRL as appropriate ++ // for the target. ++ void emitPseudoIndirectBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI); ++ ++ void emitPseudoTailBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI); ++ ++ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. ++ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); ++ ++ void emitInlineAsmStart() const override; ++ ++ void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, ++ const MCSubtargetInfo *EndInfo) const override; ++ ++ void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg); ++ ++ void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg1, unsigned Reg2); ++ ++ void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg1, unsigned Reg2, unsigned Reg3); ++ ++ void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc, ++ unsigned Reg1, unsigned Reg2, unsigned FPReg1, ++ unsigned FPReg2, bool LE); ++ ++ bool isLongBranchPseudo(int Opcode) const; ++ ++public: ++ const LoongArchSubtarget *Subtarget; ++ const LoongArchFunctionInfo *LoongArchFI; ++ LoongArchMCInstLower MCInstLowering; ++ ++ explicit LoongArchAsmPrinter(TargetMachine &TM, ++ std::unique_ptr Streamer) ++ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} ++ ++ StringRef getPassName() const override { return "LoongArch Assembly Printer"; } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ ++ void emitInstruction(const MachineInstr *MI) override; ++ const char *getCurrentABIString() const; ++ void emitFunctionEntryLabel() override; ++ void emitFunctionBodyStart() override; ++ void emitFunctionBodyEnd() override; ++ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; ++ bool isBlockOnlyReachableByFallthrough( ++ const MachineBasicBlock* MBB) const override; ++ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, ++ const char *ExtraCode, raw_ostream &O) override; ++ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, ++ const char *ExtraCode, raw_ostream &O) override; ++ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void emitStartOfAsmFile(Module &M) override; ++ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + }; + + } // end namespace llvm +diff --git a/llvm/lib/Target/LoongArch/LoongArchCCState.cpp b/llvm/lib/Target/LoongArch/LoongArchCCState.cpp +new file mode 100644 +index 000000000..6630ca759 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchCCState.cpp +@@ -0,0 +1,165 @@ ++//===---- LoongArchCCState.cpp - CCState with LoongArch specific extensions ---------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchCCState.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/IR/Module.h" ++ ++using namespace llvm; ++ ++/// This function returns true if CallSym is a long double emulation routine. ++static bool isF128SoftLibCall(const char *CallSym) { ++ const char *const LibCalls[] = { ++ "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", ++ "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti", ++ "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf", ++ "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf", ++ "__floatuntitf", "__getf2", "__gttf2", "__letf2", ++ "__lttf2", "__multf3", "__netf2", "__powitf2", ++ "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2", ++ "ceill", "copysignl", "cosl", "exp2l", ++ "expl", "floorl", "fmal", "fmaxl", ++ "fmodl", "log10l", "log2l", "logl", ++ "nearbyintl", "powl", "rintl", "roundl", ++ "sinl", "sqrtl", "truncl"}; ++ ++ // Check that LibCalls is sorted alphabetically. ++ auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; ++ assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); ++ return std::binary_search(std::begin(LibCalls), std::end(LibCalls), ++ CallSym, Comp); ++} ++ ++/// This function returns true if Ty is fp128, {f128} or i128 which was ++/// originally a fp128. ++static bool originalTypeIsF128(const Type *Ty, const char *Func) { ++ if (Ty->isFP128Ty()) ++ return true; ++ ++ if (Ty->isStructTy() && Ty->getStructNumElements() == 1 && ++ Ty->getStructElementType(0)->isFP128Ty()) ++ return true; ++ ++ // If the Ty is i128 and the function being called is a long double emulation ++ // routine, then the original type is f128. ++ return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); ++} ++ ++/// Return true if the original type was vXfXX. ++static bool originalEVTTypeIsVectorFloat(EVT Ty) { ++ if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) ++ return true; ++ ++ return false; ++} ++ ++/// Return true if the original type was vXfXX / vXfXX. ++static bool originalTypeIsVectorFloat(const Type * Ty) { ++ if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) ++ return true; ++ ++ return false; ++} ++ ++LoongArchCCState::SpecialCallingConvType ++LoongArchCCState::getSpecialCallingConvForCallee(const SDNode *Callee, ++ const LoongArchSubtarget &Subtarget) { ++ LoongArchCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv; ++ return SpecialCallingConv; ++} ++ ++void LoongArchCCState::PreAnalyzeCallResultForF128( ++ const SmallVectorImpl &Ins, ++ const Type *RetTy, const char *Call) { ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(RetTy, Call)); ++ OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); ++ } ++} ++ ++/// Identify lowered values that originated from f128 or float arguments and ++/// record this for use by RetCC_LoongArchLP64LPX32. ++void LoongArchCCState::PreAnalyzeReturnForF128( ++ const SmallVectorImpl &Outs) { ++ const MachineFunction &MF = getMachineFunction(); ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(MF.getFunction().getReturnType(), nullptr)); ++ OriginalArgWasFloat.push_back( ++ MF.getFunction().getReturnType()->isFloatingPointTy()); ++ } ++} ++ ++/// Identify lower values that originated from vXfXX and record ++/// this. ++void LoongArchCCState::PreAnalyzeCallResultForVectorFloat( ++ const SmallVectorImpl &Ins, const Type *RetTy) { ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); ++ } ++} ++ ++/// Identify lowered values that originated from vXfXX arguments and record ++/// this. ++void LoongArchCCState::PreAnalyzeReturnForVectorFloat( ++ const SmallVectorImpl &Outs) { ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ ISD::OutputArg Out = Outs[i]; ++ OriginalRetWasFloatVector.push_back( ++ originalEVTTypeIsVectorFloat(Out.ArgVT)); ++ } ++} ++ ++/// Identify lowered values that originated from f128, float and sret to vXfXX ++/// arguments and record this. ++void LoongArchCCState::PreAnalyzeCallOperands( ++ const SmallVectorImpl &Outs, ++ std::vector &FuncArgs, ++ const char *Func) { ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; ++ ++ OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); ++ OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); ++ OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); ++ CallOperandIsFixed.push_back(Outs[i].IsFixed); ++ } ++} ++ ++/// Identify lowered values that originated from f128, float and vXfXX arguments ++/// and record this. ++void LoongArchCCState::PreAnalyzeFormalArgumentsForF128( ++ const SmallVectorImpl &Ins) { ++ const MachineFunction &MF = getMachineFunction(); ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); ++ ++ // SRet arguments cannot originate from f128 or {f128} returns so we just ++ // push false. We have to handle this specially since SRet arguments ++ // aren't mapped to an original argument. ++ if (Ins[i].Flags.isSRet()) { ++ OriginalArgWasF128.push_back(false); ++ OriginalArgWasFloat.push_back(false); ++ OriginalArgWasFloatVector.push_back(false); ++ continue; ++ } ++ ++ assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size()); ++ std::advance(FuncArg, Ins[i].getOrigArgIndex()); ++ ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(FuncArg->getType(), nullptr)); ++ OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); ++ ++ // The LoongArch vector ABI exhibits a corner case of sorts or quirk; if the ++ // first argument is actually an SRet pointer to a vector, then the next ++ // argument slot is $a2. ++ OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); ++ } ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchCCState.h b/llvm/lib/Target/LoongArch/LoongArchCCState.h +new file mode 100644 +index 000000000..1c1a1446e +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchCCState.h +@@ -0,0 +1,165 @@ ++//===---- LoongArchCCState.h - CCState with LoongArch specific extensions -----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LoongArchCCSTATE_H ++#define LoongArchCCSTATE_H ++ ++#include "LoongArchISelLowering.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++ ++namespace llvm { ++class SDNode; ++class LoongArchSubtarget; ++ ++class LoongArchCCState : public CCState { ++public: ++ enum SpecialCallingConvType { NoSpecialCallingConv }; ++ ++ /// Determine the SpecialCallingConvType for the given callee ++ static SpecialCallingConvType ++ getSpecialCallingConvForCallee(const SDNode *Callee, ++ const LoongArchSubtarget &Subtarget); ++ ++private: ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64LPX32. ++ void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, ++ const Type *RetTy, const char * Func); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64LPX32. ++ void PreAnalyzeReturnForF128(const SmallVectorImpl &Outs); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this. ++ void ++ PreAnalyzeCallOperands(const SmallVectorImpl &Outs, ++ std::vector &FuncArgs, ++ const char *Func); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64LPX32. ++ void ++ PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); ++ ++ void ++ PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, ++ const Type *RetTy); ++ ++ void PreAnalyzeFormalArgumentsForVectorFloat( ++ const SmallVectorImpl &Ins); ++ ++ void ++ PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); ++ ++ /// Records whether the value has been lowered from an f128. ++ SmallVector OriginalArgWasF128; ++ ++ /// Records whether the value has been lowered from float. ++ SmallVector OriginalArgWasFloat; ++ ++ /// Records whether the value has been lowered from a floating point vector. ++ SmallVector OriginalArgWasFloatVector; ++ ++ /// Records whether the return value has been lowered from a floating point ++ /// vector. ++ SmallVector OriginalRetWasFloatVector; ++ ++ /// Records whether the value was a fixed argument. ++ /// See ISD::OutputArg::IsFixed, ++ SmallVector CallOperandIsFixed; ++ ++ // FIXME: This should probably be a fully fledged calling convention. ++ SpecialCallingConvType SpecialCallingConv; ++ ++public: ++ LoongArchCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, ++ SmallVectorImpl &locs, LLVMContext &C, ++ SpecialCallingConvType SpecialCC = NoSpecialCallingConv) ++ : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {} ++ ++ void ++ AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ CCAssignFn Fn, ++ std::vector &FuncArgs, ++ const char *Func) { ++ PreAnalyzeCallOperands(Outs, FuncArgs, Func); ++ CCState::AnalyzeCallOperands(Outs, Fn); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasFloatVector.clear(); ++ CallOperandIsFixed.clear(); ++ } ++ ++ // The AnalyzeCallOperands in the base class is not usable since we must ++ // provide a means of accessing ArgListEntry::IsFixed. Delete them from this ++ // class. This doesn't stop them being used via the base class though. ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ CCAssignFn Fn) = delete; ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ SmallVectorImpl &Flags, ++ CCAssignFn Fn) = delete; ++ ++ void AnalyzeFormalArguments(const SmallVectorImpl &Ins, ++ CCAssignFn Fn) { ++ PreAnalyzeFormalArgumentsForF128(Ins); ++ CCState::AnalyzeFormalArguments(Ins, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ void AnalyzeCallResult(const SmallVectorImpl &Ins, ++ CCAssignFn Fn, const Type *RetTy, ++ const char *Func) { ++ PreAnalyzeCallResultForF128(Ins, RetTy, Func); ++ PreAnalyzeCallResultForVectorFloat(Ins, RetTy); ++ CCState::AnalyzeCallResult(Ins, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ void AnalyzeReturn(const SmallVectorImpl &Outs, ++ CCAssignFn Fn) { ++ PreAnalyzeReturnForF128(Outs); ++ PreAnalyzeReturnForVectorFloat(Outs); ++ CCState::AnalyzeReturn(Outs, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ bool CheckReturn(const SmallVectorImpl &ArgsFlags, ++ CCAssignFn Fn) { ++ PreAnalyzeReturnForF128(ArgsFlags); ++ PreAnalyzeReturnForVectorFloat(ArgsFlags); ++ bool Return = CCState::CheckReturn(ArgsFlags, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ return Return; ++ } ++ ++ bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; } ++ bool WasOriginalArgFloat(unsigned ValNo) { ++ return OriginalArgWasFloat[ValNo]; ++ } ++ bool WasOriginalArgVectorFloat(unsigned ValNo) const { ++ return OriginalArgWasFloatVector[ValNo]; ++ } ++ bool WasOriginalRetVectorFloat(unsigned ValNo) const { ++ return OriginalRetWasFloatVector[ValNo]; ++ } ++ bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } ++ SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } ++}; ++} ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td +index 984416316..e8564e85b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td ++++ b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td +@@ -1,23 +1,310 @@ +-//=- LoongArchCallingConv.td - Calling Conventions LoongArch -*- tablegen -*-=// ++//===-- LoongArchCallingConv.td - Calling Conventions for LoongArch --*- tablegen -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// ++// This describes the calling conventions for LoongArch architecture. ++//===----------------------------------------------------------------------===// ++ ++/// CCIfSubtarget - Match if the current subtarget has a feature F. ++class CCIfSubtarget ++ : CCIf" ++ "(State.getMachineFunction().getSubtarget()).", ++ F), ++ A>; ++ ++// The inverse of CCIfSubtarget ++class CCIfSubtargetNot : CCIfSubtarget; ++ ++/// Match if the original argument (before lowering) was a float. ++/// For example, this is true for i32's that were lowered from soft-float. ++class CCIfOrigArgWasNotFloat ++ : CCIf<"!static_cast(&State)->WasOriginalArgFloat(ValNo)", ++ A>; ++ ++/// Match if the original argument (before lowering) was a 128-bit float (i.e. ++/// long double). ++class CCIfOrigArgWasF128 ++ : CCIf<"static_cast(&State)->WasOriginalArgF128(ValNo)", A>; ++ ++/// Match if this specific argument is a vararg. ++/// This is slightly different fro CCIfIsVarArg which matches if any argument is ++/// a vararg. ++class CCIfArgIsVarArg ++ : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; ++ ++/// Match if the return was a floating point vector. ++class CCIfOrigArgWasNotVectorFloat ++ : CCIf<"!static_cast(&State)" ++ "->WasOriginalRetVectorFloat(ValNo)", A>; ++ ++/// Match if the special calling conv is the specified value. ++class CCIfSpecialCallingConv ++ : CCIf<"static_cast(&State)->getSpecialCallingConv() == " ++ "LoongArchCCState::" # CC, A>; ++ ++// For soft-float, f128 values are returned in A0_64 rather than V1_64. ++def RetCC_F128SoftFloat : CallingConv<[ ++ CCAssignToReg<[A0_64, A1_64]> ++]>; ++ + // +-// This describes the calling conventions for the LoongArch architecture. +-// ++// For hard-float, f128 values are returned as a pair of f64's rather than a ++// pair of i64's. ++def RetCC_F128HardFloat : CallingConv<[ ++ //CCBitConvertToType, ++ ++ // Contrary to the ABI documentation, a struct containing a long double is ++ // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to ++ // match the de facto ABI as implemented by GCC. ++ CCIfInReg>, ++ ++ CCAssignToReg<[A0_64, A1_64]> ++]>; ++ ++// Handle F128 specially since we can't identify the original type during the ++// tablegen-erated code. ++def RetCC_F128 : CallingConv<[ ++ CCIfSubtarget<"useSoftFloat()", ++ CCIfType<[i64], CCDelegateTo>>, ++ CCIfSubtargetNot<"useSoftFloat()", ++ CCIfType<[i64], CCDelegateTo>> ++]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch LP32 Calling Convention ++//===----------------------------------------------------------------------===// ++ ++def CC_LoongArchLP32 : CallingConv<[ ++ // Promote i8/i16 arguments to i32. ++ CCIfType<[i1, i8, i16], CCPromoteToType>, ++ ++ // Integer values get stored in stack slots that are 4 bytes in ++ // size and 4-byte aligned. ++ CCIfType<[i32, f32], CCAssignToStack<4, 4>>, ++ ++ // Integer values get stored in stack slots that are 8 bytes in ++ // size and 8-byte aligned. ++ CCIfType<[f64], CCAssignToStack<8, 8>> ++]>; ++ ++// Only the return rules are defined here for LP32. The rules for argument ++// passing are defined in LoongArchISelLowering.cpp. ++def RetCC_LoongArchLP32 : CallingConv<[ ++ // Promote i1/i8/i16 return values to i32. ++ CCIfType<[i1, i8, i16], CCPromoteToType>, ++ ++ // i32 are returned in registers V0, V1, A0, A1, unless the original return ++ // type was a vector of floats. ++ CCIfOrigArgWasNotVectorFloat>>, ++ ++ // f32 are returned in registers F0, F2 ++ CCIfType<[f32], CCAssignToReg<[F0, F1]>>, ++ ++ // f64 arguments are returned in F0_64 and F2_64 in FP64bit mode or ++ // in F0 and F1 in FP32bit mode. ++ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[F0_64, F1_64]>>> ++]>; ++ ++def CC_LoongArchLP32_FP32 : CustomCallingConv; ++def CC_LoongArchLP32_FP64 : CustomCallingConv; ++def CC_LoongArch_F128 : CustomCallingConv; ++ ++def CC_LoongArchLP32_FP : CallingConv<[ ++ CCIfSubtargetNot<"isFP64bit()", CCDelegateTo>, ++ CCIfSubtarget<"isFP64bit()", CCDelegateTo> ++]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch LPX32/LP64 Calling Convention ++//===----------------------------------------------------------------------===// ++ ++def CC_LoongArchLP64LPX32_SoftFloat : CallingConv<[ ++ CCAssignToReg<[A0, A1, A2, A3, ++ A4, A5, A6, A7]>, ++ CCAssignToStack<4, 8> ++]>; ++ ++def CC_LoongArchLP64LPX32 : CallingConv<[ ++ ++ // All integers (except soft-float integers) are promoted to 64-bit. ++ CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat>>, ++ ++ // The only i32's we have left are soft-float arguments. ++ CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo>>, ++ ++ // Integer arguments are passed in integer registers. ++ //CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, ++ // A4_64, A5_64, A6_64, A7_64], ++ // [F0_64, F1_64, F2_64, F3_64, ++ // F4_64, F5_64, F6_64, F7_64]>>, ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ // f32 arguments are passed in single precision FP registers. ++ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, ++ F4, F5, F6, F7]>>, ++ ++ // f64 arguments are passed in double precision FP registers. ++ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64, ++ F4_64, F5_64, F6_64, F7_64]>>, ++ ++ // others f32 arguments are passed in single precision FP registers. ++ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, ++ ++ // others f64 arguments are passed in double precision FP registers. ++ CCIfType<[f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ CCIfSubtarget<"hasLSX()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToRegWithShadow<[VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7], ++ [A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>>, ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], ++ CCAssignToRegWithShadow<[XR0, XR1, XR2, XR3, XR4, XR5, XR6, XR7], ++ [A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>>, ++ ++ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. ++ CCIfType<[f32], CCAssignToStack<4, 8>>, ++ CCIfType<[i64, f64], CCAssignToStack<8, 8>>, ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToStack<16, 16>>, ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], ++ CCAssignToStack<32, 32>> ++]>; ++ ++// LPX32/LP64 variable arguments. ++// All arguments are passed in integer registers. ++def CC_LoongArchLP64LPX32_VarArg : CallingConv<[ ++ // All integers are promoted to 64-bit. ++ CCIfType<[i8, i16, i32], CCPromoteToType>, ++ ++ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, ++ ++ CCIfType<[i64], CCIfOrigArgWasF128>>, ++ ++ CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. ++ CCIfType<[f32], CCAssignToStack<4, 8>>, ++ CCIfType<[i64, f64], CCAssignToStack<8, 8>> ++]>; ++ ++def RetCC_LoongArchLP64LPX32 : CallingConv<[ ++ // f128 needs to be handled similarly to f32 and f64. However, f128 is not ++ // legal and is lowered to i128 which is further lowered to a pair of i64's. ++ // This presents us with a problem for the calling convention since hard-float ++ // still needs to pass them in FPU registers, and soft-float needs to use $v0, ++ // and $a0 instead of the usual $v0, and $v1. We therefore resort to a ++ // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on ++ // whether the result was originally an f128 into the tablegen-erated code. ++ // ++ // f128 should only occur for the LP64 ABI where long double is 128-bit. On ++ // LPX32, long double is equivalent to double. ++ CCIfType<[i64], CCIfOrigArgWasF128>>, ++ ++ CCIfType<[i8, i16, i32, i64], CCIfInReg>>, ++ ++ // i64 are returned in registers V0_64, V1_64 ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>, ++ ++ CCIfSubtarget<"hasLSX()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[VR0]>>>, ++ ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCAssignToReg<[XR0]>>>, ++ ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>>, ++ ++ // f32 are returned in registers F0, F2 ++ CCIfType<[f32], CCAssignToReg<[F0, F1]>>, ++ ++ // f64 are returned in registers D0, D2 ++ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64]>> ++]>; ++ + //===----------------------------------------------------------------------===// ++// LoongArch Calling Convention Dispatch ++//===----------------------------------------------------------------------===// ++ ++def RetCC_LoongArch : CallingConv<[ ++ CCIfSubtarget<"isABI_LPX32()", CCDelegateTo>, ++ CCIfSubtarget<"isABI_LP64()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch_ByVal : CallingConv<[ ++ CCIfSubtarget<"isABI_LP32()", CCIfByVal>>, ++ CCIfByVal> ++]>; ++ ++def CC_LoongArch_FixedArg : CallingConv<[ ++ CCIfByVal>, ++ //CCIfByVal>>, ++ ++ // f128 needs to be handled similarly to f32 and f64 on hard-float. However, ++ // f128 is not legal and is lowered to i128 which is further lowered to a pair ++ // of i64's. ++ // This presents us with a problem for the calling convention since hard-float ++ // still needs to pass them in FPU registers. We therefore resort to a ++ // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on ++ // whether the argument was originally an f128 into the tablegen-erated code. ++ // ++ // f128 should only occur for the LP64 ABI where long double is 128-bit. On ++ // LPX32, long double is equivalent to double. ++ CCIfType<[i64], ++ CCIfSubtargetNot<"useSoftFloat()", ++ CCIfOrigArgWasF128>>>, ++ ++ CCIfSubtarget<"isABI_LP32()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch_VarArg : CallingConv<[ ++ CCIfByVal>, ++ ++ CCIfSubtarget<"isABI_LP32()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch : CallingConv<[ ++ CCIfVarArg>>, ++ CCDelegateTo ++]>; ++ ++//===----------------------------------------------------------------------===// ++// Callee-saved register lists. ++//===----------------------------------------------------------------------===// ++ ++def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 24), RA, FP, ++ (sequence "S%u", 8, 0))>; ++ ++//def CSR_LP32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, ++// (sequence "S%u", 8, 0))> { ++// let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2)); ++//} + +-def CSR_ILP32S_LP64S +- : CalleeSavedRegs<(add R1, (sequence "R%u", 22, 31))>; ++def CSR_LP32 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA, FP, ++ (sequence "S%u", 8, 0))>; + +-def CSR_ILP32F_LP64F +- : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u", 24, 31))>; ++//def CSR_LP32_FP64 : ++// CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP, ++// (sequence "S%u", 8, 0))>; + +-def CSR_ILP32D_LP64D +- : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u_64", 24, 31))>; ++def CSR_LPX32 : CalleeSavedRegs<(add F20_64, F22_64, F24_64, F26_64, F28_64, ++ F30_64, RA_64, FP_64, ++ (sequence "S%u_64", 8, 0))>; + +-// Needed for implementation of LoongArchRegisterInfo::getNoPreservedMask() +-def CSR_NoRegs : CalleeSavedRegs<(add)>; ++//def CSR_LP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, SP_64, FP_64, ++def CSR_LP64 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA_64, FP_64, ++ (sequence "S%u_64", 8, 0))>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +new file mode 100644 +index 000000000..0f33e1db6 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +@@ -0,0 +1,2471 @@ ++//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains a pass that expands pseudo instructions into target ++// instructions to allow proper scheduling, if-conversion, and other late ++// optimizations. This pass should be run after register allocation but before ++// the post-regalloc scheduling pass. ++// ++// This is currently only used for expanding atomic pseudos after register ++// allocation. We do this to avoid the fast register allocator introducing ++// spills between ll and sc. These stores cause some LoongArch implementations to ++// abort the atomic RMW sequence. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/CodeGen/LivePhysRegs.h" ++#include "llvm/CodeGen/MachineFunctionPass.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-pseudo" ++ ++namespace { ++ class LoongArchExpandPseudo : public MachineFunctionPass { ++ public: ++ static char ID; ++ LoongArchExpandPseudo() : MachineFunctionPass(ID) {} ++ ++ const LoongArchInstrInfo *TII; ++ const LoongArchSubtarget *STI; ++ ++ bool runOnMachineFunction(MachineFunction &Fn) override; ++ ++ MachineFunctionProperties getRequiredProperties() const override { ++ return MachineFunctionProperties().set( ++ MachineFunctionProperties::Property::NoVRegs); ++ } ++ ++ StringRef getPassName() const override { ++ return "LoongArch pseudo instruction expansion pass"; ++ } ++ ++ private: ++ bool expandAtomicCmpSwap(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ ++ bool expandAtomicBinOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI, unsigned Size); ++ bool expandXINSERT_BOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandINSERT_HOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandXINSERT_FWOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandAtomicBinOpSubword(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandPseudoCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandPseudoTailCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I); ++ ++ bool expandPseudoTEQ(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandLoadAddr(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NMBB); ++ bool expandMBB(MachineBasicBlock &MBB); ++ }; ++ char LoongArchExpandPseudo::ID = 0; ++} ++ ++static bool hasDbar(MachineBasicBlock *MBB) { ++ ++ for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ if (MBBb->getOpcode() == LoongArch::DBAR) ++ return true; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ return false; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ unsigned LL, SC; ++ unsigned ZERO = LoongArch::ZERO; ++ unsigned BNE = LoongArch::BNE32; ++ unsigned BEQ = LoongArch::BEQ32; ++ unsigned SEOp = ++ I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ? LoongArch::EXT_W_B32 : LoongArch::EXT_W_H32; ++ ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Mask = I->getOperand(2).getReg(); ++ unsigned ShiftCmpVal = I->getOperand(3).getReg(); ++ unsigned Mask2 = I->getOperand(4).getReg(); ++ unsigned ShiftNewVal = I->getOperand(5).getReg(); ++ unsigned ShiftAmnt = I->getOperand(6).getReg(); ++ unsigned Scratch = I->getOperand(7).getReg(); ++ unsigned Scratch2 = I->getOperand(8).getReg(); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loop1MBB); ++ MF->insert(It, loop2MBB); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), &BB, ++ std::next(MachineBasicBlock::iterator(I)), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ // thisMBB: ++ // ... ++ // fallthrough --> loop1MBB ++ BB.addSuccessor(loop1MBB, BranchProbability::getOne()); ++ loop1MBB->addSuccessor(sinkMBB); ++ loop1MBB->addSuccessor(loop2MBB); ++ loop1MBB->normalizeSuccProbs(); ++ loop2MBB->addSuccessor(loop1MBB); ++ loop2MBB->addSuccessor(sinkMBB); ++ loop2MBB->normalizeSuccProbs(); ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // loop1MBB: ++ // ll dest, 0(ptr) ++ // and Mask', dest, Mask ++ // bne Mask', ShiftCmpVal, exitMBB ++ BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loop1MBB, DL, TII->get(LoongArch::AND32), Scratch2) ++ .addReg(Scratch) ++ .addReg(Mask); ++ BuildMI(loop1MBB, DL, TII->get(BNE)) ++ .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); ++ ++ // loop2MBB: ++ // and dest, dest, mask2 ++ // or dest, dest, ShiftNewVal ++ // sc dest, dest, 0(ptr) ++ // beq dest, $0, loop1MBB ++ BuildMI(loop2MBB, DL, TII->get(LoongArch::AND32), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(Mask2); ++ BuildMI(loop2MBB, DL, TII->get(LoongArch::OR32), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(ShiftNewVal); ++ BuildMI(loop2MBB, DL, TII->get(SC), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(Ptr) ++ .addImm(0); ++ BuildMI(loop2MBB, DL, TII->get(BEQ)) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(ZERO) ++ .addMBB(loop1MBB); ++ ++ // sinkMBB: ++ // srl srlres, Mask', shiftamt ++ // sign_extend dest,srlres ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) ++ .addReg(Scratch2) ++ .addReg(ShiftAmnt); ++ ++ BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); ++ ++ if (!hasDbar(sinkMBB)) { ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loop1MBB); ++ computeAndAddLiveIns(LiveRegs, *loop2MBB); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ const unsigned Size = ++ I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned LL, SC, ZERO, BNE, BEQ, MOVE; ++ ++ if (Size == 4) { ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ BNE = LoongArch::BNE32; ++ BEQ = LoongArch::BEQ32; ++ ++ ZERO = LoongArch::ZERO; ++ MOVE = LoongArch::OR32; ++ } else { ++ LL = LoongArch::LL_D; ++ SC = LoongArch::SC_D; ++ ZERO = LoongArch::ZERO_64; ++ BNE = LoongArch::BNE; ++ BEQ = LoongArch::BEQ; ++ MOVE = LoongArch::OR; ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned OldVal = I->getOperand(2).getReg(); ++ unsigned NewVal = I->getOperand(3).getReg(); ++ unsigned Scratch = I->getOperand(4).getReg(); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loop1MBB); ++ MF->insert(It, loop2MBB); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), &BB, ++ std::next(MachineBasicBlock::iterator(I)), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ // thisMBB: ++ // ... ++ // fallthrough --> loop1MBB ++ BB.addSuccessor(loop1MBB, BranchProbability::getOne()); ++ loop1MBB->addSuccessor(exitMBB); ++ loop1MBB->addSuccessor(loop2MBB); ++ loop1MBB->normalizeSuccProbs(); ++ loop2MBB->addSuccessor(loop1MBB); ++ loop2MBB->addSuccessor(exitMBB); ++ loop2MBB->normalizeSuccProbs(); ++ ++ // loop1MBB: ++ // ll dest, 0(ptr) ++ // bne dest, oldval, exitMBB ++ BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); ++ BuildMI(loop1MBB, DL, TII->get(BNE)) ++ .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB); ++ ++ // loop2MBB: ++ // move scratch, NewVal ++ // sc Scratch, Scratch, 0(ptr) ++ // beq Scratch, $0, loop1MBB ++ BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); ++ BuildMI(loop2MBB, DL, TII->get(SC), Scratch) ++ .addReg(Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loop2MBB, DL, TII->get(BEQ)) ++ .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); ++ ++ if (!hasDbar(exitMBB)) { ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loop1MBB); ++ computeAndAddLiveIns(LiveRegs, *loop2MBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandXINSERT_FWOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned Dsttmp = I->getOperand(4).getReg(); ++ unsigned RI = I->getOperand(5).getReg(); ++ unsigned RJ = I->getOperand(6).getReg(); ++ Dsttmp = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *blocks[11]; ++ MachineFunction::iterator It = ++BB.getIterator(); ++ for (int i = 0; i < 11; i++) { ++ blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); ++ MF->insert(It, blocks[i]); ++ } ++ ++ MachineBasicBlock *mainMBB = blocks[0]; ++ MachineBasicBlock *FirstMBB = blocks[1]; ++ MachineBasicBlock *sinkMBB = blocks[9]; ++ MachineBasicBlock *exitMBB = blocks[10]; ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ for (int i = 1; i < 9; i++) { ++ mainMBB->addSuccessor(blocks[i]); ++ blocks[i]->addSuccessor(sinkMBB); ++ } ++ ++ unsigned ADDI, BLT, ZERO; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ for (int i = 1; i < 8; i++) { ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); ++ BuildMI(mainMBB, DL, TII->get(BLT)) ++ .addReg(LaneReg) ++ .addReg(RI) ++ .addMBB(blocks[i + 1]); ++ } ++ ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); ++ ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(RJ) ++ .addImm(7); ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ for (int i = 0; i < 7; i++) { ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(RJ) ++ .addImm(i); ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ } ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) ++ .addReg(Dsttmp) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ for (int i = 0; i < 11; i++) { ++ computeAndAddLiveIns(LiveRegs, *blocks[i]); ++ } ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandINSERT_HOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned Dsttmp = I->getOperand(4).getReg(); ++ unsigned RI = I->getOperand(5).getReg(); ++ Dsttmp = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *blocks[11]; ++ MachineFunction::iterator It = ++BB.getIterator(); ++ for (int i = 0; i < 11; i++) { ++ blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); ++ MF->insert(It, blocks[i]); ++ } ++ ++ MachineBasicBlock *mainMBB = blocks[0]; ++ MachineBasicBlock *FirstMBB = blocks[1]; ++ MachineBasicBlock *sinkMBB = blocks[9]; ++ MachineBasicBlock *exitMBB = blocks[10]; ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ for (int i = 1; i < 9; i++) { ++ mainMBB->addSuccessor(blocks[i]); ++ blocks[i]->addSuccessor(sinkMBB); ++ } ++ ++ unsigned ADDI, BLT, ZERO; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ for (int i = 1; i < 8; i++) { ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); ++ BuildMI(mainMBB, DL, TII->get(BLT)) ++ .addReg(LaneReg) ++ .addReg(RI) ++ .addMBB(blocks[i + 1]); ++ } ++ ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); ++ ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(SrcValReg) ++ .addImm(7); ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ for (int i = 0; i < 7; i++) { ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(SrcValReg) ++ .addImm(i); ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ } ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::VORI_B), Dest) ++ .addReg(Dsttmp) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ for (int i = 0; i < 11; i++) { ++ computeAndAddLiveIns(LiveRegs, *blocks[i]); ++ } ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandXINSERT_BOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned R4r = I->getOperand(5).getReg(); ++ unsigned Rib = I->getOperand(6).getReg(); ++ unsigned Ris = I->getOperand(7).getReg(); ++ unsigned R7b1 = I->getOperand(8).getReg(); ++ unsigned R7b2 = I->getOperand(9).getReg(); ++ unsigned R7b3 = I->getOperand(10).getReg(); ++ unsigned R7r80_3 = I->getOperand(11).getReg(); ++ unsigned R7r80l_3 = I->getOperand(12).getReg(); ++ unsigned R7r81_3 = I->getOperand(13).getReg(); ++ unsigned R7r81l_3 = I->getOperand(14).getReg(); ++ unsigned R7r82_3 = I->getOperand(15).getReg(); ++ unsigned R7r82l_3 = I->getOperand(16).getReg(); ++ unsigned RI = I->getOperand(17).getReg(); ++ unsigned tmp_Dst73 = I->getOperand(18).getReg(); ++ unsigned Rimm = I->getOperand(19).getReg(); ++ unsigned R70 = I->getOperand(20).getReg(); ++ tmp_Dst73 = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, mainMBB); ++ MF->insert(It, SevenMBB); ++ MF->insert(It, SevenMBB3); ++ MF->insert(It, SevenMBB0); ++ MF->insert(It, SevenMBB1); ++ MF->insert(It, SevenMBB2); ++ MF->insert(It, ZeroMBB); ++ MF->insert(It, ZeroMBB3); ++ MF->insert(It, ZeroMBB0); ++ MF->insert(It, ZeroMBB1); ++ MF->insert(It, ZeroMBB2); ++ MF->insert(It, OneMBB); ++ MF->insert(It, OneMBB3); ++ MF->insert(It, OneMBB0); ++ MF->insert(It, OneMBB1); ++ MF->insert(It, OneMBB2); ++ MF->insert(It, TwoMBB); ++ MF->insert(It, TwoMBB3); ++ MF->insert(It, TwoMBB0); ++ MF->insert(It, TwoMBB1); ++ MF->insert(It, TwoMBB2); ++ MF->insert(It, ThreeMBB); ++ MF->insert(It, ThreeMBB3); ++ MF->insert(It, ThreeMBB0); ++ MF->insert(It, ThreeMBB1); ++ MF->insert(It, ThreeMBB2); ++ MF->insert(It, FourMBB); ++ MF->insert(It, FourMBB3); ++ MF->insert(It, FourMBB0); ++ MF->insert(It, FourMBB1); ++ MF->insert(It, FourMBB2); ++ MF->insert(It, FiveMBB); ++ MF->insert(It, FiveMBB3); ++ MF->insert(It, FiveMBB0); ++ MF->insert(It, FiveMBB1); ++ MF->insert(It, FiveMBB2); ++ MF->insert(It, SixMBB); ++ MF->insert(It, SixMBB3); ++ MF->insert(It, SixMBB0); ++ MF->insert(It, SixMBB1); ++ MF->insert(It, SixMBB2); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ mainMBB->addSuccessor(SevenMBB); ++ mainMBB->addSuccessor(ZeroMBB); ++ mainMBB->addSuccessor(OneMBB); ++ mainMBB->addSuccessor(TwoMBB); ++ mainMBB->addSuccessor(ThreeMBB); ++ mainMBB->addSuccessor(FourMBB); ++ mainMBB->addSuccessor(FiveMBB); ++ mainMBB->addSuccessor(SixMBB); ++ SevenMBB->addSuccessor(SevenMBB0); ++ SevenMBB->addSuccessor(SevenMBB1); ++ SevenMBB->addSuccessor(SevenMBB2); ++ SevenMBB->addSuccessor(SevenMBB3); ++ SevenMBB0->addSuccessor(sinkMBB); ++ SevenMBB1->addSuccessor(sinkMBB); ++ SevenMBB2->addSuccessor(sinkMBB); ++ SevenMBB3->addSuccessor(sinkMBB); ++ ZeroMBB->addSuccessor(ZeroMBB0); ++ ZeroMBB->addSuccessor(ZeroMBB1); ++ ZeroMBB->addSuccessor(ZeroMBB2); ++ ZeroMBB->addSuccessor(ZeroMBB3); ++ ZeroMBB0->addSuccessor(sinkMBB); ++ ZeroMBB1->addSuccessor(sinkMBB); ++ ZeroMBB2->addSuccessor(sinkMBB); ++ ZeroMBB3->addSuccessor(sinkMBB); ++ OneMBB->addSuccessor(OneMBB0); ++ OneMBB->addSuccessor(OneMBB1); ++ OneMBB->addSuccessor(OneMBB2); ++ OneMBB->addSuccessor(OneMBB3); ++ OneMBB0->addSuccessor(sinkMBB); ++ OneMBB1->addSuccessor(sinkMBB); ++ OneMBB2->addSuccessor(sinkMBB); ++ OneMBB3->addSuccessor(sinkMBB); ++ TwoMBB->addSuccessor(TwoMBB0); ++ TwoMBB->addSuccessor(TwoMBB1); ++ TwoMBB->addSuccessor(TwoMBB2); ++ TwoMBB->addSuccessor(TwoMBB3); ++ TwoMBB0->addSuccessor(sinkMBB); ++ TwoMBB1->addSuccessor(sinkMBB); ++ TwoMBB2->addSuccessor(sinkMBB); ++ TwoMBB3->addSuccessor(sinkMBB); ++ ThreeMBB->addSuccessor(ThreeMBB0); ++ ThreeMBB->addSuccessor(ThreeMBB1); ++ ThreeMBB->addSuccessor(ThreeMBB2); ++ ThreeMBB->addSuccessor(ThreeMBB3); ++ ThreeMBB0->addSuccessor(sinkMBB); ++ ThreeMBB1->addSuccessor(sinkMBB); ++ ThreeMBB2->addSuccessor(sinkMBB); ++ ThreeMBB3->addSuccessor(sinkMBB); ++ FourMBB->addSuccessor(FourMBB0); ++ FourMBB->addSuccessor(FourMBB1); ++ FourMBB->addSuccessor(FourMBB2); ++ FourMBB->addSuccessor(FourMBB3); ++ FourMBB0->addSuccessor(sinkMBB); ++ FourMBB1->addSuccessor(sinkMBB); ++ FourMBB2->addSuccessor(sinkMBB); ++ FourMBB3->addSuccessor(sinkMBB); ++ FiveMBB->addSuccessor(FiveMBB0); ++ FiveMBB->addSuccessor(FiveMBB1); ++ FiveMBB->addSuccessor(FiveMBB2); ++ FiveMBB->addSuccessor(FiveMBB3); ++ FiveMBB0->addSuccessor(sinkMBB); ++ FiveMBB1->addSuccessor(sinkMBB); ++ FiveMBB2->addSuccessor(sinkMBB); ++ FiveMBB3->addSuccessor(sinkMBB); ++ SixMBB->addSuccessor(SixMBB0); ++ SixMBB->addSuccessor(SixMBB1); ++ SixMBB->addSuccessor(SixMBB2); ++ SixMBB->addSuccessor(SixMBB3); ++ SixMBB0->addSuccessor(sinkMBB); ++ SixMBB1->addSuccessor(sinkMBB); ++ SixMBB2->addSuccessor(sinkMBB); ++ SixMBB3->addSuccessor(sinkMBB); ++ ++ unsigned SRLI, ADDI, OR, MOD, BLT, ZERO; ++ SRLI = isGP64 ? LoongArch::SRLI_D : LoongArch::SRLI_W; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ OR = isGP64 ? LoongArch::OR : LoongArch::OR32; ++ MOD = isGP64 ? LoongArch::MOD_DU : LoongArch::MOD_WU; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ BuildMI(mainMBB, DL, TII->get(SRLI), Rimm).addReg(LaneReg).addImm(2); ++ BuildMI(mainMBB, DL, TII->get(ADDI), R4r).addReg(ZERO).addImm(4); ++ BuildMI(mainMBB, DL, TII->get(OR), Rib).addReg(Rimm).addReg(ZERO); ++ BuildMI(mainMBB, DL, TII->get(MOD), Ris).addReg(Rib).addReg(R4r); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(1); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ZeroMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(2); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(OneMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(3); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(TwoMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(4); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ThreeMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(5); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FourMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(6); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FiveMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(7); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(SixMBB); ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB); ++ ++ BuildMI(SevenMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(7); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b1) ++ .addMBB(SevenMBB0); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b2) ++ .addMBB(SevenMBB1); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b3) ++ .addMBB(SevenMBB2); ++ BuildMI(SevenMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB3); ++ ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0x00fff); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xff00f); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xffff0); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xfffff); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(0); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(ZeroMBB0); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(ZeroMBB1); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(ZeroMBB2); ++ BuildMI(ZeroMBB, DL, TII->get(LoongArch::B32)).addMBB(ZeroMBB3); ++ ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(1); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(OneMBB0); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(OneMBB1); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(OneMBB2); ++ BuildMI(OneMBB, DL, TII->get(LoongArch::B32)).addMBB(OneMBB3); ++ ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(2); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(TwoMBB0); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(TwoMBB1); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(TwoMBB2); ++ BuildMI(TwoMBB, DL, TII->get(LoongArch::B32)).addMBB(TwoMBB3); ++ ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(3); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b1) ++ .addMBB(ThreeMBB0); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b2) ++ .addMBB(ThreeMBB1); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b3) ++ .addMBB(ThreeMBB2); ++ BuildMI(ThreeMBB, DL, TII->get(LoongArch::B32)).addMBB(ThreeMBB3); ++ ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0x00fff); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xff00f); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xffff0); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xfffff); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(4); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FourMBB0); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FourMBB1); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FourMBB2); ++ BuildMI(FourMBB, DL, TII->get(LoongArch::B32)).addMBB(FourMBB3); ++ ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(5); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FiveMBB0); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FiveMBB1); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FiveMBB2); ++ BuildMI(FiveMBB, DL, TII->get(LoongArch::B32)).addMBB(FiveMBB3); ++ ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(6); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(SixMBB0); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(SixMBB1); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(SixMBB2); ++ BuildMI(SixMBB, DL, TII->get(LoongArch::B32)).addMBB(SixMBB3); ++ ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) ++ .addReg(tmp_Dst73) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *mainMBB); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB0); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB1); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB2); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB3); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB0); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB1); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB2); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB3); ++ computeAndAddLiveIns(LiveRegs, *OneMBB); ++ computeAndAddLiveIns(LiveRegs, *OneMBB0); ++ computeAndAddLiveIns(LiveRegs, *OneMBB1); ++ computeAndAddLiveIns(LiveRegs, *OneMBB2); ++ computeAndAddLiveIns(LiveRegs, *OneMBB3); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB0); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB1); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB2); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB3); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB0); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB1); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB2); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB3); ++ computeAndAddLiveIns(LiveRegs, *FourMBB); ++ computeAndAddLiveIns(LiveRegs, *FourMBB0); ++ computeAndAddLiveIns(LiveRegs, *FourMBB1); ++ computeAndAddLiveIns(LiveRegs, *FourMBB2); ++ computeAndAddLiveIns(LiveRegs, *FourMBB3); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB0); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB1); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB2); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB3); ++ computeAndAddLiveIns(LiveRegs, *SixMBB); ++ computeAndAddLiveIns(LiveRegs, *SixMBB0); ++ computeAndAddLiveIns(LiveRegs, *SixMBB1); ++ computeAndAddLiveIns(LiveRegs, *SixMBB2); ++ computeAndAddLiveIns(LiveRegs, *SixMBB3); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicBinOpSubword( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ unsigned LL, SC; ++ unsigned BEQ = LoongArch::BEQ32; ++ unsigned SEOp = LoongArch::EXT_W_H32; ++ ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ ++ bool IsSwap = false; ++ bool IsNand = false; ++ bool IsMAX = false; ++ bool IsMIN = false; ++ bool IsUnsigned = false; ++ ++ unsigned Opcode = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: ++ IsNand = true; ++ break; ++ case LoongArch::ATOMIC_SWAP_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_SWAP_I16_POSTRA: ++ IsSwap = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: ++ Opcode = LoongArch::ADD_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_W; ++ IsMAX = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_W; ++ IsMIN = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_WU; ++ IsMAX = true; ++ IsUnsigned = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_WU; ++ IsMIN = true; ++ IsUnsigned = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: ++ Opcode = LoongArch::SUB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: ++ Opcode = LoongArch::AND32; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: ++ Opcode = LoongArch::OR32; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: ++ Opcode = LoongArch::XOR32; ++ break; ++ default: ++ llvm_unreachable("Unknown subword atomic pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Incr = I->getOperand(2).getReg(); ++ unsigned Mask = I->getOperand(3).getReg(); ++ unsigned Mask2 = I->getOperand(4).getReg(); ++ unsigned ShiftAmnt = I->getOperand(5).getReg(); ++ unsigned OldVal = I->getOperand(6).getReg(); ++ unsigned BinOpRes = I->getOperand(7).getReg(); ++ unsigned StoreVal = I->getOperand(8).getReg(); ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loopMBB); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(loopMBB, BranchProbability::getOne()); ++ loopMBB->addSuccessor(sinkMBB); ++ loopMBB->addSuccessor(loopMBB); ++ loopMBB->normalizeSuccProbs(); ++ ++ BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); ++ if (IsNand) { ++ // and andres, oldval, incr2 ++ // nor binopres, $0, andres ++ // and newval, binopres, mask ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(OldVal) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::NOR32), BinOpRes) ++ .addReg(LoongArch::ZERO) ++ .addReg(BinOpRes); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Mask); ++ } else if (IsMAX || IsMIN) { ++ ++ unsigned SLTScratch4 = IsUnsigned ? LoongArch::SLTU32 : LoongArch::SLT32; ++ unsigned CMPIncr = IsMAX ? LoongArch::MASKEQZ32 : LoongArch::MASKNEZ32; ++ unsigned CMPOldVal = IsMAX ? LoongArch::MASKNEZ32 : LoongArch::MASKEQZ32; ++ ++ unsigned Scratch4 = I->getOperand(9).getReg(); ++ unsigned Scratch5 = I->getOperand(10).getReg(); ++ ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Scratch5) ++ .addReg(OldVal) ++ .addReg(Mask); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Incr) ++ .addReg(Incr) ++ .addReg(Mask); ++ BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4) ++ .addReg(Scratch5) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(CMPOldVal), BinOpRes) ++ .addReg(Scratch5) ++ .addReg(Scratch4); ++ BuildMI(loopMBB, DL, TII->get(CMPIncr), Scratch4) ++ .addReg(Incr) ++ .addReg(Scratch4); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Scratch4); ++ ++ } else if (!IsSwap) { ++ // binopres, oldval, incr2 ++ // and newval, binopres, mask ++ BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) ++ .addReg(OldVal) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Mask); ++ } else { // atomic.swap ++ // and newval, incr2, mask ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(Incr) ++ .addReg(Mask); ++ } ++ ++ // and StoreVal, OlddVal, Mask2 ++ // or StoreVal, StoreVal, BinOpRes ++ // StoreVal = sc StoreVal, 0(Ptr) ++ // beq StoreVal, zero, loopMBB ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), StoreVal) ++ .addReg(OldVal) ++ .addReg(Mask2); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), StoreVal) ++ .addReg(StoreVal) ++ .addReg(BinOpRes); ++ BuildMI(loopMBB, DL, TII->get(SC), StoreVal) ++ .addReg(StoreVal) ++ .addReg(Ptr) ++ .addImm(0); ++ BuildMI(loopMBB, DL, TII->get(BEQ)) ++ .addReg(StoreVal) ++ .addReg(LoongArch::ZERO) ++ .addMBB(loopMBB); ++ ++ // sinkMBB: ++ // and maskedoldval1,oldval,mask ++ // srl srlres,maskedoldval1,shiftamt ++ // sign_extend dest,srlres ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::AND32), Dest) ++ .addReg(OldVal) ++ .addReg(Mask); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) ++ .addReg(Dest) ++ .addReg(ShiftAmnt); ++ ++ BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loopMBB); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI, ++ unsigned Size) { ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned LL, SC, ZERO, BEQ, SUB; ++ if (Size == 4) { ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ BEQ = LoongArch::BEQ32; ++ ZERO = LoongArch::ZERO; ++ SUB = LoongArch::SUB_W; ++ } else { ++ LL = LoongArch::LL_D; ++ SC = LoongArch::SC_D; ++ ZERO = LoongArch::ZERO_64; ++ BEQ = LoongArch::BEQ; ++ SUB = LoongArch::SUB_D; ++ } ++ ++ unsigned OldVal = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Incr = I->getOperand(2).getReg(); ++ unsigned Scratch = I->getOperand(3).getReg(); ++ ++ unsigned Opcode = 0; ++ unsigned OR = 0; ++ unsigned AND = 0; ++ unsigned NOR = 0; ++ bool IsNand = false; ++ bool IsSub = false; ++ switch (I->getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: ++ Opcode = LoongArch::AMADD_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: ++ IsSub = true; ++ Opcode = LoongArch::AMADD_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: ++ Opcode = LoongArch::AMAND_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: ++ Opcode = LoongArch::AMOR_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: ++ Opcode = LoongArch::AMXOR_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: ++ IsNand = true; ++ AND = LoongArch::AND32; ++ NOR = LoongArch::NOR32; ++ break; ++ case LoongArch::ATOMIC_SWAP_I32_POSTRA: ++ OR = LoongArch::AMSWAP_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_WU; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_WU; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: ++ Opcode = LoongArch::AMADD_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: ++ IsSub = true; ++ Opcode = LoongArch::AMADD_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: ++ Opcode = LoongArch::AMAND_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: ++ Opcode = LoongArch::AMOR_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: ++ Opcode = LoongArch::AMXOR_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: ++ IsNand = true; ++ AND = LoongArch::AND; ++ NOR = LoongArch::NOR; ++ break; ++ case LoongArch::ATOMIC_SWAP_I64_POSTRA: ++ OR = LoongArch::AMSWAP_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_DU; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_DU; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo atomic!"); ++ } ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loopMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(loopMBB, BranchProbability::getOne()); ++ loopMBB->addSuccessor(exitMBB); ++ loopMBB->addSuccessor(loopMBB); ++ loopMBB->normalizeSuccProbs(); ++ ++ assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); ++ assert((OldVal != Incr) && "Clobbered the wrong reg!"); ++ if (Opcode) { ++ if(IsSub){ ++ BuildMI(loopMBB, DL, TII->get(SUB), Scratch).addReg(ZERO).addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Scratch).addReg(Ptr).addImm(0); ++ } ++ else{ ++ BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Incr).addReg(Ptr).addImm(0); ++ } ++ } else if (IsNand) { ++ assert(AND && NOR && ++ "Unknown nand instruction for atomic pseudo expansion"); ++ BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); ++ BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); ++ BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); ++ } else { ++ assert(OR && "Unknown instruction for atomic pseudo expansion!"); ++ BuildMI(loopMBB, DL, TII->get(OR), OldVal).addReg(Incr).addReg(Ptr).addImm(0); ++ } ++ ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loopMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddr(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineFunction *MF = BB.getParent(); ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Op = MI.getOpcode(); ++ unsigned DestReg = MI.getOperand(0).getReg(); ++ unsigned TmpReg; ++ const MachineOperand &MO = MI.getOperand(1); ++ Reloc::Model RM = MF->getTarget().getRelocationModel(); ++ ++ MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; ++ unsigned HiFlag, LoFlag, HigherFlag, HighestFlag; ++ unsigned HiOp, LoOp, HigherOp, HighestOp, LastOp; ++ bool UseGot = false; ++ ++ HiOp = LoongArch::PCADDU12I_ri; ++ LoOp = LoongArch::ORI_rri; ++ HigherOp = LoongArch::LU32I_D_ri; ++ HighestOp = LoongArch::LU52I_D_rri; ++ ++ switch (Op) { ++ case LoongArch::LoadAddrLocal: ++ if (RM == Reloc::Static) { // for jit ++ HiFlag = LoongArchII::MO_ABS_HI; ++ LoFlag = LoongArchII::MO_ABS_LO; ++ HigherFlag = LoongArchII::MO_ABS_HIGHER; ++ HighestFlag = LoongArchII::MO_ABS_HIGHEST; ++ // lu12i.w + ori + lu32i.d + lu52i.d ++ HiOp = LoongArch::LU12I_W; ++ LoOp = LoongArch::ORI; ++ HigherOp = LoongArch::LU32I_D; ++ HighestOp = LoongArch::LU52I_D; ++ } else { ++ // pcaddu12i + addi.d ++ LoFlag = LoongArchII::MO_PCREL_LO; ++ HiFlag = LoongArchII::MO_PCREL_HI; ++ LoOp = LoongArch::ADDI_D_rri; ++ } ++ break; ++ case LoongArch::LoadAddrLocalRR: ++ // pcaddu12i + ori + lu32i.d + lu52i.d + add.d ++ LoFlag = LoongArchII::MO_PCREL_RRLO; ++ HiFlag = LoongArchII::MO_PCREL_RRHI; ++ HigherFlag = LoongArchII::MO_PCREL_RRHIGHER; ++ HighestFlag = LoongArchII::MO_PCREL_RRHIGHEST; ++ LastOp = LoongArch::ADD_D_rrr; ++ break; ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobal_Alias: ++ // pcaddu12i + ld.d ++ LoFlag = LoongArchII::MO_GOT_LO; ++ HiFlag = LoongArchII::MO_GOT_HI; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::LD_D_rrii; ++ UseGot = true; ++ break; ++ case LoongArch::LoadAddrGlobalRR: ++ // pcaddu12i + ori + lu32i.d + lu52i.d +ldx.d ++ LoFlag = LoongArchII::MO_GOT_RRLO; ++ HiFlag = LoongArchII::MO_GOT_RRHI; ++ HigherFlag = LoongArchII::MO_GOT_RRHIGHER; ++ HighestFlag = LoongArchII::MO_GOT_RRHIGHEST; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::ORI_rrii; ++ HigherOp = LoongArch::LU32I_D_rii; ++ HighestOp = LoongArch::LU52I_D_rrii; ++ LastOp = LoongArch::LDX_D_rrr; ++ UseGot = true; ++ break; ++ case LoongArch::LoadAddrTLS_LE: ++ // lu12i.w + ori + lu32i.d + lu52i.d ++ LoFlag = LoongArchII::MO_TLSLE_LO; ++ HiFlag = LoongArchII::MO_TLSLE_HI; ++ HigherFlag = LoongArchII::MO_TLSLE_HIGHER; ++ HighestFlag = LoongArchII::MO_TLSLE_HIGHEST; ++ HiOp = LoongArch::LU12I_W_ri; ++ break; ++ case LoongArch::LoadAddrTLS_IE: ++ // pcaddu12i + ld.d ++ LoFlag = LoongArchII::MO_TLSIE_LO; ++ HiFlag = LoongArchII::MO_TLSIE_HI; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::LD_D_rrii; ++ UseGot = true; ++ break; ++ case LoongArch::LoadAddrTLS_IE_RR: ++ // pcaddu12i + ori + lu32i.d + lu52i.d +ldx.d ++ LoFlag = LoongArchII::MO_TLSIE_RRLO; ++ HiFlag = LoongArchII::MO_TLSIE_RRHI; ++ HigherFlag = LoongArchII::MO_TLSIE_RRHIGHER; ++ HighestFlag = LoongArchII::MO_TLSIE_RRHIGHEST; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::ORI_rrii; ++ HigherOp = LoongArch::LU32I_D_rii; ++ HighestOp = LoongArch::LU52I_D_rrii; ++ LastOp = LoongArch::LDX_D_rrr; ++ UseGot = true; ++ break; ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_GD: ++ // pcaddu12i + addi.d ++ LoFlag = LoongArchII::MO_TLSGD_LO; ++ HiFlag = LoongArchII::MO_TLSGD_HI; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::ADDI_D_rrii; ++ UseGot = true; ++ break; ++ case LoongArch::LoadAddrTLS_LD_RR: ++ case LoongArch::LoadAddrTLS_GD_RR: ++ // pcaddu12i + ori + lu32i.d + lu52i.d + add.d ++ LoFlag = LoongArchII::MO_TLSGD_RRLO; ++ HiFlag = LoongArchII::MO_TLSGD_RRHI; ++ HigherFlag = LoongArchII::MO_TLSGD_RRHIGHER; ++ HighestFlag = LoongArchII::MO_TLSGD_RRHIGHEST; ++ HiOp = LoongArch::PCADDU12I_rii; ++ LoOp = LoongArch::ORI_rrii; ++ HigherOp = LoongArch::LU32I_D_rii; ++ HighestOp = LoongArch::LU52I_D_rrii; ++ LastOp = LoongArch::ADD_D_rrr; ++ UseGot = true; ++ break; ++ default: ++ break; ++ } ++ ++ MIB1 = BuildMI(BB, I, DL, TII->get(HiOp), DestReg); ++ ++ switch (Op) { ++ case LoongArch::LoadAddrLocal: ++ if (RM == Reloc::Static) { // for jit ++ // la.abs rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg).addReg(DestReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg).addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ break; ++ } ++ LLVM_FALLTHROUGH; ++ case LoongArch::LoadAddrGlobal: // la.global rd, symbol ++ case LoongArch::LoadAddrGlobal_Alias: // la rd, symbol ++ case LoongArch::LoadAddrTLS_IE: // la.tls.ie rd, symbol ++ case LoongArch::LoadAddrTLS_LD: // la.tls.ld rd, symbol ++ case LoongArch::LoadAddrTLS_GD: // la.tls.gd rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) ++ .addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ } ++ if (UseGot == true) { ++ MIB1.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ MIB2.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ } ++ break; ++ ++ case LoongArch::LoadAddrLocalRR: //la.local rd, rs, symbol ++ case LoongArch::LoadAddrGlobalRR: // la.global rd, rs, symbol ++ case LoongArch::LoadAddrTLS_IE_RR: // la.tls.ie rd, rs, symbol ++ case LoongArch::LoadAddrTLS_LD_RR: // la.tls.ld rd, rs, symbol ++ case LoongArch::LoadAddrTLS_GD_RR: // la.tls.gd rd, rs, symbol ++ TmpReg = MI.getOperand(MI.getNumOperands()-1).getReg(); ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), TmpReg) ++ .addReg(TmpReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), TmpReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), TmpReg) ++ .addReg(TmpReg); ++ MIB5 = BuildMI(BB, I, DL, TII->get(LastOp), DestReg) ++ .addReg(DestReg) ++ .addReg(TmpReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ if (UseGot == true) { ++ MIB1.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ MIB2.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ MIB3.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ MIB4.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); ++ } ++ break; ++ case LoongArch::LoadAddrTLS_LE: // la.tls.le rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) ++ .addReg(DestReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg) ++ .addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoTailCall( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I) { ++ ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ const MachineOperand &MO = MI.getOperand(0); ++ ++ unsigned NoFlag = LoongArchII::MO_NO_FLAG; ++ ++ MachineInstrBuilder MIB = ++ BuildMI(BB, I, DL, TII->get(LoongArch::PseudoTailReturn)); ++ ++ if (MO.isSymbol()) { ++ MIB.addExternalSymbol(MO.getSymbolName(), NoFlag); ++ } else { ++ MIB.addDisp(MO, 0, NoFlag); ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineFunction *MF = BB.getParent(); ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ CodeModel::Model M = MF->getTarget().getCodeModel(); ++ Reloc::Model RM = MF->getTarget().getRelocationModel(); ++ ++ unsigned Ra = LoongArch::RA_64; ++ const MachineOperand &MO = MI.getOperand(0); ++ unsigned HiFlag, LoFlag, HigherFlag, HighestFlag, NoFlag; ++ ++ HiFlag = LoongArchII::MO_CALL_HI; ++ LoFlag = LoongArchII::MO_CALL_LO; ++ NoFlag = LoongArchII::MO_NO_FLAG; ++ ++ if (RM == Reloc::Static) { // for jit ++ MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; ++ ++ HiFlag = LoongArchII::MO_ABS_HI; ++ LoFlag = LoongArchII::MO_ABS_LO; ++ HigherFlag = LoongArchII::MO_ABS_HIGHER; ++ HighestFlag = LoongArchII::MO_ABS_HIGHEST; ++ // lu12i.w + ori + lu32i.d + lu52i.d + jirl ++ ++ MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::LU12I_W), Ra); ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::ORI), Ra) ++ .addReg(Ra); ++ MIB3 = BuildMI(BB, I, DL, TII->get(LoongArch::LU32I_D), Ra); ++ MIB4 = BuildMI(BB, I, DL, TII->get(LoongArch::LU52I_D), Ra) ++ .addReg(Ra); ++ MIB5 = ++ BuildMI(BB, I, DL, TII->get(LoongArch::JIRL), Ra).addReg(Ra).addImm(0); ++ if (MO.isSymbol()) { ++ MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); ++ MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); ++ MIB3.addExternalSymbol(MO.getSymbolName(), HigherFlag); ++ MIB4.addExternalSymbol(MO.getSymbolName(), HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ } else if (M == CodeModel::Large) { ++ // pcaddu18i + jirl ++ MachineInstrBuilder MIB1; ++ MachineInstrBuilder MIB2; ++ ++ MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::PCADDU18I), Ra); ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::JIRL_CALL), Ra).addReg(Ra); ++ if (MO.isSymbol()) { ++ MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); ++ MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ } ++ } else { ++ // bl ++ MachineInstrBuilder MIB1; ++ MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::BL)); ++ if (MO.isSymbol()) { ++ MIB1.addExternalSymbol(MO.getSymbolName(), NoFlag); ++ } else { ++ MIB1.addDisp(MO, 0, NoFlag); ++ } ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoTEQ(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Divisor = MI.getOperand(0).getReg(); ++ unsigned BneOp = LoongArch::BNE; ++ unsigned Zero = LoongArch::ZERO_64; ++ ++ // beq $Divisor, $zero, 8 ++ BuildMI(BB, I, DL, TII->get(BneOp), Divisor) ++ .addReg(Zero) ++ .addImm(8); ++ // break 7 ++ BuildMI(BB, I, DL, TII->get(LoongArch::BREAK)) ++ .addImm(7);; ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NMBB) { ++ ++ bool Modified = false; ++ ++ switch (MBBI->getOpcode()) { ++ case LoongArch::PseudoTEQ: ++ return expandPseudoTEQ(MBB, MBBI, NMBB); ++ case LoongArch::PseudoCall: ++ return expandPseudoCall(MBB, MBBI, NMBB); ++ case LoongArch::PseudoTailCall: ++ return expandPseudoTailCall(MBB, MBBI); ++ case LoongArch::LoadAddrLocal: ++ case LoongArch::LoadAddrLocalRR: ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobalRR: ++ case LoongArch::LoadAddrGlobal_Alias: ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_LD_RR: ++ case LoongArch::LoadAddrTLS_GD: ++ case LoongArch::LoadAddrTLS_GD_RR: ++ case LoongArch::LoadAddrTLS_IE: ++ case LoongArch::LoadAddrTLS_IE_RR: ++ case LoongArch::LoadAddrTLS_LE: ++ return expandLoadAddr(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA: ++ case LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA: ++ return expandAtomicCmpSwap(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA: ++ case LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA: ++ return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_SWAP_I8_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: ++ return expandAtomicBinOpSubword(MBB, MBBI, NMBB); ++ case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: ++ return expandXINSERT_BOp(MBB, MBBI, NMBB); ++ case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: ++ return expandINSERT_HOp(MBB, MBBI, NMBB); ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: ++ return expandXINSERT_FWOp(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: ++ return expandAtomicBinOp(MBB, MBBI, NMBB, 4); ++ case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: ++ return expandAtomicBinOp(MBB, MBBI, NMBB, 8); ++ default: ++ return Modified; ++ } ++} ++ ++bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { ++ bool Modified = false; ++ ++ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); ++ while (MBBI != E) { ++ MachineBasicBlock::iterator NMBBI = std::next(MBBI); ++ Modified |= expandMI(MBB, MBBI, NMBBI); ++ MBBI = NMBBI; ++ } ++ ++ return Modified; ++} ++ ++bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { ++ STI = &static_cast(MF.getSubtarget()); ++ TII = STI->getInstrInfo(); ++ ++ bool Modified = false; ++ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; ++ ++MFI) ++ Modified |= expandMBB(*MFI); ++ ++ if (Modified) ++ MF.RenumberBlocks(); ++ ++ return Modified; ++} ++ ++/// createLoongArchExpandPseudoPass - returns an instance of the pseudo instruction ++/// expansion pass. ++FunctionPass *llvm::createLoongArchExpandPseudoPass() { ++ return new LoongArchExpandPseudo(); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +deleted file mode 100644 +index 20448492a..000000000 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ /dev/null +@@ -1,229 +0,0 @@ +-//=-- LoongArchInstrInfoF.td - Single-Precision Float instr --*- tablegen -*-=// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file describes the baisc single-precision floating-point instructions. +-// +-//===----------------------------------------------------------------------===// +- +-//===----------------------------------------------------------------------===// +-// LoongArch specific DAG Nodes. +-//===----------------------------------------------------------------------===// +- +-def SDT_LoongArchMOVGR2FR_W_LA64 +- : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; +-def SDT_LoongArchMOVFR2GR_S_LA64 +- : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; +-def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; +- +-def loongarch_movgr2fr_w_la64 +- : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; +-def loongarch_movfr2gr_s_la64 +- : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>; +-def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; +- +-//===----------------------------------------------------------------------===// +-// Instructions +-//===----------------------------------------------------------------------===// +- +-let Predicates = [HasBasicF] in { +- +-// Arithmetic Operation Instructions +-def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>; +-def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>; +-def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>; +-def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>; +-def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>; +-def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>; +-def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>; +-def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>; +-def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>; +-def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>; +-def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>; +-def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>; +-def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>; +-def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>; +-def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>; +-def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>; +-def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>; +-def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>; +-def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>; +-def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>; +-def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>; +- +- +-// Comparison Instructions +-def FCMP_CAF_S : FP_CMP; +-def FCMP_CUN_S : FP_CMP; +-def FCMP_CEQ_S : FP_CMP; +-def FCMP_CUEQ_S : FP_CMP; +-def FCMP_CLT_S : FP_CMP; +-def FCMP_CULT_S : FP_CMP; +-def FCMP_CLE_S : FP_CMP; +-def FCMP_CULE_S : FP_CMP; +-def FCMP_CNE_S : FP_CMP; +-def FCMP_COR_S : FP_CMP; +-def FCMP_CUNE_S : FP_CMP; +-def FCMP_SAF_S : FP_CMP; +-def FCMP_SUN_S : FP_CMP; +-def FCMP_SEQ_S : FP_CMP; +-def FCMP_SUEQ_S : FP_CMP; +-def FCMP_SLT_S : FP_CMP; +-def FCMP_SULT_S : FP_CMP; +-def FCMP_SLE_S : FP_CMP; +-def FCMP_SULE_S : FP_CMP; +-def FCMP_SNE_S : FP_CMP; +-def FCMP_SOR_S : FP_CMP; +-def FCMP_SUNE_S : FP_CMP; +- +-// Conversion Instructions +-def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>; +-def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>; +-def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32, +- FPR32>; +-def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32, +- FPR32>; +-def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32, +- FPR32>; +-def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32, +- FPR32>; +-def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>; +- +-// Move Instructions +-def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>; +-def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>; +-def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>; +-def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>; +-def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>; +-def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>; +-def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>; +-def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>; +-def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>; +-def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>; +- +-// Branch Instructions +-def BCEQZ : FP_BRANCH<0b01001000, "bceqz">; +-def BCNEZ : FP_BRANCH<0b01001001, "bcnez">; +- +-// Common Memory Access Instructions +-def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>; +-def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>; +-def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>; +-def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>; +- +-// Bound Check Memory Access Instructions +-def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>; +-def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>; +-def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; +-def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; +- +-} // Predicates = [HasBasicF] +- +-//===----------------------------------------------------------------------===// +-// Pseudo-instructions and codegen patterns +-//===----------------------------------------------------------------------===// +- +-/// Generic pattern classes +- +-class PatFpr +- : Pat<(OpNode RegTy:$fj), (Inst $fj)>; +-class PatFprFpr +- : Pat<(OpNode RegTy:$fj, RegTy:$fk), (Inst $fj, $fk)>; +- +-let Predicates = [HasBasicF] in { +- +-/// Float arithmetic operations +- +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFpr; +- +-/// Setcc +- +-// Match non-signaling comparison +- +-// TODO: change setcc to any_fsetcc after call is supported because +-// we need to call llvm.experimental.constrained.fcmp.f32 in testcase. +-// See RISCV float-fcmp-strict.ll for reference. +-class PatFPSetcc +- : Pat<(setcc RegTy:$fj, RegTy:$fk, cc), +- (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; +-// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +- +-// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. +- +-/// Select +- +-def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj), +- (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>; +- +-/// Selectcc +- +-class PatFPSelectcc +- : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f), +- (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +- +-/// Loads +- +-defm : LdPat; +- +-/// Stores +- +-defm : StPat; +- +-/// Floating point constants +- +-def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>; +-def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>; +-def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>; +- +-// FP Conversion +-def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; +-} // Predicates = [HasBasicF] +- +-let Predicates = [HasBasicF, IsLA64] in { +-// GPR -> FPR +-def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; +-// FPR -> GPR +-def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), +- (MOVFR2GR_S FPR32:$src)>; +-// int -> f32 +-def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +-} // Predicates = [HasBasicF, IsLA64] +- +-let Predicates = [HasBasicF, IsLA32] in { +-// GPR -> FPR +-def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; +-// FPR -> GPR +-def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; +-// int -> f32 +-def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +-} // Predicates = [HasBasicF, IsLA64] +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +deleted file mode 100644 +index bb50cec9f..000000000 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ /dev/null +@@ -1,242 +0,0 @@ +-//=-- LoongArchInstrInfoD.td - Double-Precision Float instr -*- tablegen -*-==// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file describes the basic double-precision floating-point instructions. +-// +-//===----------------------------------------------------------------------===// +- +-//===----------------------------------------------------------------------===// +-// Instructions +-//===----------------------------------------------------------------------===// +- +-let Predicates = [HasBasicD] in { +- +-// Arithmetic Operation Instructions +-def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>; +-def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>; +-def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>; +-def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>; +-def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>; +-def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>; +-def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>; +-def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>; +-def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>; +-def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>; +-def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>; +-def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>; +-def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>; +-def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>; +-def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>; +-def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>; +-def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>; +-def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>; +-def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>; +-def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>; +-def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>; +- +-// Comparison Instructions +-def FCMP_CAF_D : FP_CMP; +-def FCMP_CUN_D : FP_CMP; +-def FCMP_CEQ_D : FP_CMP; +-def FCMP_CUEQ_D : FP_CMP; +-def FCMP_CLT_D : FP_CMP; +-def FCMP_CULT_D : FP_CMP; +-def FCMP_CLE_D : FP_CMP; +-def FCMP_CULE_D : FP_CMP; +-def FCMP_CNE_D : FP_CMP; +-def FCMP_COR_D : FP_CMP; +-def FCMP_CUNE_D : FP_CMP; +-def FCMP_SAF_D : FP_CMP; +-def FCMP_SUN_D : FP_CMP; +-def FCMP_SEQ_D : FP_CMP; +-def FCMP_SUEQ_D : FP_CMP; +-def FCMP_SLT_D : FP_CMP; +-def FCMP_SULT_D : FP_CMP; +-def FCMP_SLE_D : FP_CMP; +-def FCMP_SULE_D : FP_CMP; +-def FCMP_SNE_D : FP_CMP; +-def FCMP_SOR_D : FP_CMP; +-def FCMP_SUNE_D : FP_CMP; +- +-// Conversion Instructions +-def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>; +-def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>; +-def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64, +- FPR32>; +-def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64, +- FPR32>; +-def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64, +- FPR32>; +-def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64, +- FPR32>; +-def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>; +-def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>; +-def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>; +-def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>; +-def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>; +-def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>; +-def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32, +- FPR64>; +-def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64, +- FPR64>; +-def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32, +- FPR64>; +-def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64, +- FPR64>; +-def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32, +- FPR64>; +-def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64, +- FPR64>; +-def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32, +- FPR64>; +-def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64, +- FPR64>; +-def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>; +- +-// Move Instructions +-def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>; +-def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>; +-let isCodeGenOnly = 1 in { +-def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>; +-def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>; +-} // isCodeGenOnly = 1 +-let Constraints = "$dst = $out" in { +-def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out), +- (ins FPR64:$dst, GPR:$src), "movgr2frh.w", +- "$dst, $src">; +-} // Constraints = "$dst = $out" +- +-// Common Memory Access Instructions +-def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>; +-def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>; +-def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>; +-def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>; +- +-// Bound Check Memory Access Instructions +-def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>; +-def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>; +-def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>; +-def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>; +- +-} // Predicates = [HasBasicD] +- +-// Instructions only available on LA64 +-let Predicates = [HasBasicD, IsLA64] in { +-def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; +-def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; +-} // Predicates = [HasBasicD, IsLA64] +- +-// Instructions only available on LA32 +-let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in { +-def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>; +-} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 +- +-//===----------------------------------------------------------------------===// +-// Pseudo-instructions and codegen patterns +-//===----------------------------------------------------------------------===// +- +-let Predicates = [HasBasicD] in { +- +-/// Float arithmetic operations +- +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFprFpr; +-def : PatFpr; +- +-/// Setcc +- +-// Match non-signaling comparison +- +-// TODO: Change setcc to any_fsetcc after call is supported because +-// we need to call llvm.experimental.constrained.fcmp.f64 in testcase. +-// See RISCV float-fcmp-strict.ll for reference. +- +-// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +-def : PatFPSetcc; +- +-// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. +- +-/// Select +- +-def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj), +- (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>; +- +-/// Selectcc +- +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +-def : PatFPSelectcc; +- +-/// Loads +- +-defm : LdPat; +- +-/// Stores +- +-defm : StPat; +- +-/// FP conversion operations +- +-def : Pat<(loongarch_ftint FPR64:$src), (FTINTRZ_W_D FPR64:$src)>; +-def : Pat<(f64 (loongarch_ftint FPR64:$src)), (FTINTRZ_L_D FPR64:$src)>; +-def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>; +- +-// f64 -> f32 +-def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>; +-// f32 -> f64 +-def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; +-} // Predicates = [HasBasicD] +- +-/// Floating point constants +- +-let Predicates = [HasBasicD, IsLA64] in { +-def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>; +-def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>; +-def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>; +- +-// Convert int to FP +-def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), +- (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +-def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; +- +-def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), +- (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +- +-def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; +- +-// Convert FP to int +-def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; +-} // Predicates = [HasBasicD, IsLA64] +- +-let Predicates = [HasBasicD, IsLA32] in { +-def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; +-def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; +-def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; +- +-// Convert int to FP +-def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +-} // Predicates = [HasBasicD, IsLA32] +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td +deleted file mode 100644 +index d2ba1fdff..000000000 +--- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td ++++ /dev/null +@@ -1,241 +0,0 @@ +-//==- LoongArchInstrFormatsF.td - LoongArch FP Instr Formats -*- tablegen -*-=// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +- +-//===----------------------------------------------------------------------===// +-// Describe LoongArch floating-point instructions format +-// +-// opcode - operation code. +-// fd - destination register operand. +-// {c/f}{j/k/a} - source register operand. +-// immN - immediate data operand. +-// +-//===----------------------------------------------------------------------===// +- +-// 2R-type +-// +-class FPFmt2R op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> fj; +- bits<5> fd; +- +- let Inst{31-10} = op; +- let Inst{9-5} = fj; +- let Inst{4-0} = fd; +-} +- +-// 3R-type +-// +-class FPFmt3R op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> fk; +- bits<5> fj; +- bits<5> fd; +- +- let Inst{31-15} = op; +- let Inst{14-10} = fk; +- let Inst{9-5} = fj; +- let Inst{4-0} = fd; +-} +- +-// 4R-type +-// +-class FPFmt4R op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> fa; +- bits<5> fk; +- bits<5> fj; +- bits<5> fd; +- +- let Inst{31-20} = op; +- let Inst{19-15} = fa; +- let Inst{14-10} = fk; +- let Inst{9-5} = fj; +- let Inst{4-0} = fd; +-} +- +-// 2RI12-type +-// +-class FPFmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<12> imm12; +- bits<5> rj; +- bits<5> fd; +- +- let Inst{31-22} = op; +- let Inst{21-10} = imm12; +- let Inst{9-5} = rj; +- let Inst{4-0} = fd; +-} +- +-// FmtFCMP +-// +-class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opcstr, +- string opnstr, list pattern = []> +- : LAInst { +- bits<5> fk; +- bits<5> fj; +- bits<3> cd; +- +- let Inst{31-20} = op; +- let Inst{19-15} = cond; +- let Inst{14-10} = fk; +- let Inst{9-5} = fj; +- let Inst{4-3} = 0b00; +- let Inst{2-0} = cd; +-} +- +-// FPFmtBR +-// +-class FPFmtBR opcode, dag outs, dag ins, string opcstr, +- string opnstr, list pattern = []> +- : LAInst { +- bits<21> imm21; +- bits<3> cj; +- +- let Inst{31-26} = opcode{7-2}; +- let Inst{25-10} = imm21{15-0}; +- let Inst{9-8} = opcode{1-0}; +- let Inst{7-5} = cj; +- let Inst{4-0} = imm21{20-16}; +-} +- +-// FmtFSEL +-// +-class FPFmtFSEL op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<3> ca; +- bits<5> fk; +- bits<5> fj; +- bits<5> fd; +- +- let Inst{31-18} = op; +- let Inst{17-15} = ca; +- let Inst{14-10} = fk; +- let Inst{9-5} = fj; +- let Inst{4-0} = fd; +-} +- +-// FPFmtMOV +-// +-class FPFmtMOV op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> src; +- bits<5> dst; +- +- let Inst{31-10} = op; +- let Inst{9-5} = src; +- let Inst{4-0} = dst; +-} +- +-// FPFmtMEM +-// +-class FPFmtMEM op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> rk; +- bits<5> rj; +- bits<5> fd; +- +- let Inst{31-15} = op; +- let Inst{14-10} = rk; +- let Inst{9-5} = rj; +- let Inst{4-0} = fd; +-} +- +-//===----------------------------------------------------------------------===// +-// Instruction class templates +-//===----------------------------------------------------------------------===// +- +-class FP_ALU_2R op, string opstr, RegisterClass rc> +- : FPFmt2R; +- +-class FP_ALU_3R op, string opstr, RegisterClass rc> +- : FPFmt3R; +- +-class FP_ALU_4R op, string opstr, RegisterClass rc> +- : FPFmt4R; +- +-class FPCMPOpc value> { +- bits<12> val = value; +-} +- +-class FPCMPCond value> { +- bits<5> val = value; +-} +- +-class FP_CMP +- : FPFmtFCMP; +- +-class FP_CONV op, string opstr, RegisterClass rcd, RegisterClass rcs> +- : FPFmt2R; +- +-class FP_MOV op, string opstr, RegisterClass rcd, RegisterClass rcs> +- : FPFmtMOV; +- +-class FP_SEL op, string opstr, RegisterClass rc> +- : FPFmtFSEL; +- +-class FP_BRANCH opcode, string opstr> +- : FPFmtBR { +- let isBranch = 1; +- let isTerminator = 1; +-} +- +-let mayLoad = 1 in { +-class FP_LOAD_3R op, string opstr, RegisterClass rc> +- : FPFmtMEM; +-class FP_LOAD_2RI12 op, string opstr, RegisterClass rc> +- : FPFmt2RI12; +-} // mayLoad = 1 +- +-let mayStore = 1 in { +-class FP_STORE_3R op, string opstr, RegisterClass rc> +- : FPFmtMEM; +-class FP_STORE_2RI12 op, string opstr, RegisterClass rc> +- : FPFmt2RI12; +-} // mayStore = 1 +- +-def FPCMP_OPC_S : FPCMPOpc<0b000011000001>; +-def FPCMP_OPC_D : FPCMPOpc<0b000011000010>; +- +-def FPCMP_COND_CAF : FPCMPCond<0x0>; +-def FPCMP_COND_CUN : FPCMPCond<0x8>; +-def FPCMP_COND_CEQ : FPCMPCond<0x4>; +-def FPCMP_COND_CUEQ : FPCMPCond<0xC>; +-def FPCMP_COND_CLT : FPCMPCond<0x2>; +-def FPCMP_COND_CULT : FPCMPCond<0xA>; +-def FPCMP_COND_CLE : FPCMPCond<0x6>; +-def FPCMP_COND_CULE : FPCMPCond<0xE>; +-def FPCMP_COND_CNE : FPCMPCond<0x10>; +-def FPCMP_COND_COR : FPCMPCond<0x14>; +-def FPCMP_COND_CUNE : FPCMPCond<0x18>; +-def FPCMP_COND_SAF : FPCMPCond<0x1>; +-def FPCMP_COND_SUN : FPCMPCond<0x9>; +-def FPCMP_COND_SEQ : FPCMPCond<0x5>; +-def FPCMP_COND_SUEQ : FPCMPCond<0xD>; +-def FPCMP_COND_SLT : FPCMPCond<0x3>; +-def FPCMP_COND_SULT : FPCMPCond<0xB>; +-def FPCMP_COND_SLE : FPCMPCond<0x7>; +-def FPCMP_COND_SULE : FPCMPCond<0xF>; +-def FPCMP_COND_SNE : FPCMPCond<0x11>; +-def FPCMP_COND_SOR : FPCMPCond<0x15>; +-def FPCMP_COND_SUNE : FPCMPCond<0x19>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +index 0d9ec9e2e..7c4c141e1 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==// ++//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information --------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,192 +11,365 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchFrameLowering.h" +-#include "LoongArchMachineFunctionInfo.h" +-#include "LoongArchSubtarget.h" + #include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchTargetMachine.h" ++#include "LoongArchRegisterInfo.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/ADT/BitVector.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" + #include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" + #include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineModuleInfo.h" ++#include "llvm/CodeGen/MachineOperand.h" + #include "llvm/CodeGen/MachineRegisterInfo.h" + #include "llvm/CodeGen/RegisterScavenging.h" +-#include "llvm/IR/DiagnosticInfo.h" ++#include "llvm/CodeGen/TargetInstrInfo.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/IR/Function.h" + #include "llvm/MC/MCDwarf.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MachineLocation.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include ++#include ++#include + + using namespace llvm; + +-#define DEBUG_TYPE "loongarch-frame-lowering" +- +-// Return true if the specified function should have a dedicated frame +-// pointer register. This is true if frame pointer elimination is +-// disabled, if it needs dynamic stack realignment, if the function has +-// variable sized allocas, or if the frame address is taken. +-bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { +- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); +- +- const MachineFrameInfo &MFI = MF.getFrameInfo(); +- return MF.getTarget().Options.DisableFramePointerElim(MF) || +- RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || +- MFI.isFrameAddressTaken(); +-} +- +-bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { ++// We would like to split the SP adjustment to reduce prologue/epilogue ++// as following instructions. In this way, the offset of the callee saved ++// register could fit in a single store. ++uint64_t ++LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF, ++ bool IsPrologue) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); +- const TargetRegisterInfo *TRI = STI.getRegisterInfo(); +- +- return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); +-} +- +-void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, Register DestReg, +- Register SrcReg, int64_t Val, +- MachineInstr::MIFlag Flag) const { +- const LoongArchInstrInfo *TII = STI.getInstrInfo(); +- bool IsLA64 = STI.is64Bit(); +- +- if (DestReg == SrcReg && Val == 0) +- return; ++ const std::vector &CSI = MFI.getCalleeSavedInfo(); ++ uint64_t StackSize = MFI.getStackSize(); + +- if (isInt<12>(Val)) { +- // addi.w/d $DstReg, $SrcReg, Val +- BuildMI(MBB, MBBI, DL, +- TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) +- .addReg(SrcReg) +- .addImm(Val) +- .setMIFlag(Flag); +- return; ++ // Return the FirstSPAdjustAmount if the StackSize can not fit in signed ++ // 12-bit and there exists a callee saved register need to be pushed. ++ if (!isInt<12>(StackSize)) { ++ // FirstSPAdjustAmount is choosed as (2048 - StackAlign) ++ // because 2048 will cause sp = sp + 2048 in epilogue split into ++ // multi-instructions. The offset smaller than 2048 can fit in signle ++ // load/store instruction and we have to stick with the stack alignment. ++ return CSI.size() > 0 ? 2048 - getStackAlign().value() ++ : (IsPrologue ? 2048 : 0); + } +- +- report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); ++ return 0; + } + +-// Determine the size of the frame and maximum call frame size. +-void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { +- MachineFrameInfo &MFI = MF.getFrameInfo(); +- +- // Get the number of bytes to allocate from the FrameInfo. +- uint64_t FrameSize = MFI.getStackSize(); +- +- // Make sure the frame is aligned. +- FrameSize = alignTo(FrameSize, getStackAlign()); +- +- // Update frame info. +- MFI.setStackSize(FrameSize); +-} ++//===----------------------------------------------------------------------===// ++// ++// Stack Frame Processing methods ++// +----------------------------+ ++// ++// The stack is allocated decrementing the stack pointer on ++// the first instruction of a function prologue. Once decremented, ++// all stack references are done thought a positive offset ++// from the stack/frame pointer, so the stack is considering ++// to grow up! Otherwise terrible hacks would have to be made ++// to get this stack ABI compliant :) ++// ++// The stack frame required by the ABI (after call): ++// Offset ++// ++// 0 ---------- ++// 4 Args to pass ++// . Alloca allocations ++// . Local Area ++// . CPU "Callee Saved" Registers ++// . saved FP ++// . saved RA ++// . FPU "Callee Saved" Registers ++// StackSize ----------- ++// ++// Offset - offset from sp after stack allocation on function prologue ++// ++// The sp is the stack pointer subtracted/added from the stack size ++// at the Prologue/Epilogue ++// ++// References to the previous stack (to obtain arguments) are done ++// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) ++// ++// Examples: ++// - reference to the actual stack frame ++// for any local area var there is smt like : FI >= 0, StackOffset: 4 ++// st.w REGX, SP, 4 ++// ++// - reference to previous stack frame ++// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. ++// The emitted instruction will be something like: ++// ld.w REGX, SP, 16+StackSize ++// ++// Since the total stack size is unknown on LowerFormalArguments, all ++// stack references (ObjectOffset) created to reference the function ++// arguments, are negative numbers. This way, on eliminateFrameIndex it's ++// possible to detect those references and the offsets are adjusted to ++// their real location. ++// ++//===----------------------------------------------------------------------===// ++// ++LoongArchFrameLowering::LoongArchFrameLowering(const LoongArchSubtarget &STI) ++ : TargetFrameLowering(StackGrowsDown, STI.getStackAlignment(), 0, ++ STI.getStackAlignment()), STI(STI) {} + + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); +- const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); +- const LoongArchInstrInfo *TII = STI.getInstrInfo(); +- MachineBasicBlock::iterator MBBI = MBB.begin(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + +- Register SPReg = LoongArch::R3; +- Register FPReg = LoongArch::R22; +- +- // Debug location must be unknown since the first debug location is used +- // to determine the end of the prologue. +- DebugLoc DL; +- +- // Determine the correct frame layout +- determineFrameLayout(MF); ++ const LoongArchInstrInfo &TII = ++ *static_cast(STI.getInstrInfo()); ++ const LoongArchRegisterInfo &RegInfo = ++ *static_cast(STI.getRegisterInfo()); ++ MachineBasicBlock::iterator MBBI = MBB.begin(); ++ DebugLoc dl; ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned SP = ABI.GetStackPtr(); ++ unsigned FP = ABI.GetFramePtr(); ++ unsigned ZERO = ABI.GetNullPtr(); ++ unsigned MOVE = ABI.GetGPRMoveOp(); ++ unsigned ADDI = ABI.GetPtrAddiOp(); ++ unsigned AND = ABI.IsLP64() ? LoongArch::AND : LoongArch::AND32; ++ unsigned SLLI = ABI.IsLP64() ? LoongArch::SLLI_D : LoongArch::SLLI_W; ++ ++ const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? ++ &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); ++ uint64_t RealStackSize = StackSize; + +- // Early exit if there is no need to allocate space in the stack. ++ // No need to allocate space on the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true); ++ uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; ++ // Split the SP adjustment to reduce the offsets of callee saved spill. ++ if (FirstSPAdjustAmount) ++ StackSize = FirstSPAdjustAmount; ++ + // Adjust stack. +- adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); +- // Emit ".cfi_def_cfa_offset StackSize". +- unsigned CFIIndex = +- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); +- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +- .addCFIIndex(CFIIndex) +- .setMIFlag(MachineInstr::FrameSetup); ++ TII.adjustReg(SP, SP, -StackSize, MBB, MBBI, MachineInstr::FrameSetup); ++ if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) { ++ // Emit ".cfi_def_cfa_offset StackSize". ++ unsigned CFIIndex = ++ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ } + +- const auto &CSI = MFI.getCalleeSavedInfo(); ++ MachineModuleInfo &MMI = MF.getMMI(); ++ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); ++ ++ const std::vector &CSI = MFI.getCalleeSavedInfo(); ++ ++ if (!CSI.empty()) { ++ // Find the instruction past the last instruction that saves a callee-saved ++ // register to the stack. ++ for (unsigned i = 0; i < CSI.size(); ++i) ++ ++MBBI; ++ ++ // Iterate over list of callee-saved registers and emit .cfi_offset ++ // directives. ++ for (std::vector::const_iterator I = CSI.begin(), ++ E = CSI.end(); I != E; ++I) { ++ int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); ++ unsigned Reg = I->getReg(); ++ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( ++ nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ } ++ } + +- // The frame pointer is callee-saved, and code has been generated for us to +- // save it to the stack. We need to skip over the storing of callee-saved +- // registers as the frame pointer must be modified after it has been saved +- // to the stack, not before. +- std::advance(MBBI, CSI.size()); +- +- // Iterate over list of callee-saved registers and emit .cfi_offset +- // directives. +- for (const auto &Entry : CSI) { +- int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); +- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( +- nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); +- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +- .addCFIIndex(CFIIndex) +- .setMIFlag(MachineInstr::FrameSetup); ++ if (LoongArchFI->callsEhReturn()) { ++ // Insert instructions that spill eh data registers. ++ for (int I = 0; I < 4; ++I) { ++ if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) ++ MBB.addLiveIn(ABI.GetEhDataReg(I)); ++ TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, ++ LoongArchFI->getEhDataRegFI(I), RC, &RegInfo); ++ } ++ ++ // Emit .cfi_offset directives for eh data registers. ++ for (int I = 0; I < 4; ++I) { ++ int64_t Offset = MFI.getObjectOffset(LoongArchFI->getEhDataRegFI(I)); ++ unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::createOffset(nullptr, Reg, Offset)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ } + } + +- // Generate new FP. ++ // If framepointer enabled, set it to point to the stack pointer on entry. + if (hasFP(MF)) { +- adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); +- +- // Emit ".cfi_def_cfa $fp, 0" +- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( +- nullptr, RI->getDwarfRegNum(FPReg, true), 0)); +- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) ++ // Insert instruction "addi.w/d $fp, $sp, StackSize" at this location. ++ TII.adjustReg(FP, SP, StackSize - LoongArchFI->getVarArgsSaveSize(), MBB, ++ MBBI, MachineInstr::FrameSetup); ++ // Emit ".cfi_def_cfa $fp, $varargs_size". ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::cfiDefCfa(nullptr, MRI->getDwarfRegNum(FP, true), ++ LoongArchFI->getVarArgsSaveSize())); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } ++ ++ // Emit the second SP adjustment after saving callee saved registers. ++ if (FirstSPAdjustAmount && SecondSPAdjustAmount) { ++ if (hasFP(MF)) { ++ assert(SecondSPAdjustAmount > 0 && ++ "SecondSPAdjustAmount should be greater than zero"); ++ TII.adjustReg(SP, SP, -SecondSPAdjustAmount, MBB, MBBI, ++ MachineInstr::FrameSetup); ++ } else { ++ // FIXME: RegScavenger will place the spill instruction before the ++ // prologue if a VReg is created in the prologue. This will pollute the ++ // caller's stack data. Therefore, until there is better way, we just use ++ // the `addi.w/d` instruction for stack adjustment to ensure that VReg ++ // will not be created. ++ for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048) ++ BuildMI(MBB, MBBI, dl, TII.get(ADDI), SP) ++ .addReg(SP) ++ .addImm(Val < 2048 ? -Val : -2048) ++ .setMIFlag(MachineInstr::FrameSetup); ++ // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", ++ // don't emit an sp-based .cfi_def_cfa_offset. ++ // Emit ".cfi_def_cfa_offset StackSize" ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex) ++ .setMIFlag(MachineInstr::FrameSetup); ++ } ++ } ++ ++ // Realign stack. ++ if (hasFP(MF)) { ++ if (RegInfo.hasStackRealignment(MF)) { ++ // addiu $Reg, $zero, -MaxAlignment ++ // andi $sp, $sp, $Reg ++ unsigned VR = MF.getRegInfo().createVirtualRegister(RC); ++ assert((Log2(MFI.getMaxAlign()) < 16) && ++ "Function's alignment size requirement is not supported."); ++ int MaxAlign = -(int)MFI.getMaxAlign().value(); ++ int Alignment = (int)MFI.getMaxAlign().value(); ++ ++ if (Alignment <= 2048) { ++ BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(MaxAlign); ++ BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); ++ } else { ++ const unsigned NrBitsToZero = countTrailingZeros((unsigned)Alignment); ++ BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(-1); ++ BuildMI(MBB, MBBI, dl, TII.get(SLLI), VR) ++ .addReg(VR) ++ .addImm(NrBitsToZero); ++ BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); ++ } ++ ++ if (hasBP(MF)) { ++ // move $s7, $sp ++ unsigned BP = STI.isABI_LP64() ? LoongArch::S7_64 : LoongArch::S7; ++ BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP).addReg(SP).addReg(ZERO); ++ } ++ } ++ } + } + + void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +- const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); +- MachineFrameInfo &MFI = MF.getFrameInfo(); +- Register SPReg = LoongArch::R3; +- + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); +- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + +- const auto &CSI = MFI.getCalleeSavedInfo(); +- // Skip to before the restores of callee-saved registers. +- auto LastFrameDestroy = MBBI; +- if (!CSI.empty()) +- LastFrameDestroy = std::prev(MBBI, CSI.size()); ++ const LoongArchInstrInfo &TII = ++ *static_cast(STI.getInstrInfo()); ++ const LoongArchRegisterInfo &RegInfo = ++ *static_cast(STI.getRegisterInfo()); ++ ++ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned SP = ABI.GetStackPtr(); ++ unsigned FP = ABI.GetFramePtr(); + + // Get the number of bytes from FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer. +- if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { +- assert(hasFP(MF) && "frame pointer should not have been eliminated"); +- adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, +- MachineInstr::FrameDestroy); ++ if (hasFP(MF) && ++ (RegInfo.hasStackRealignment(MF) || MFI.hasVarSizedObjects())) { ++ // Find the first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ TII.adjustReg(SP, FP, -(StackSize - LoongArchFI->getVarArgsSaveSize()), MBB, ++ I); + } + +- // Deallocate stack +- adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); +-} ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); ++ if (FirstSPAdjustAmount) { ++ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; ++ assert(SecondSPAdjustAmount > 0 && ++ "SecondSPAdjustAmount should be greater than zero"); ++ // Find the first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ ++ TII.adjustReg(SP, SP, SecondSPAdjustAmount, MBB, I); ++ } + +-void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, +- BitVector &SavedRegs, +- RegScavenger *RS) const { +- TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); +- // Unconditionally spill RA and FP only if the function uses a frame +- // pointer. +- if (hasFP(MF)) { +- SavedRegs.set(LoongArch::R1); +- SavedRegs.set(LoongArch::R22); ++ if (LoongArchFI->callsEhReturn()) { ++ const TargetRegisterClass *RC = ++ ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ ++ // Find first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ ++ // Insert instructions that restore eh data registers. ++ for (int J = 0; J < 4; ++J) ++ TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), ++ LoongArchFI->getEhDataRegFI(J), RC, &RegInfo); + } +- // Mark BP as used if function has dedicated base pointer. +- if (hasBP(MF)) +- SavedRegs.set(LoongArchABI::getBPReg()); ++ ++ if (FirstSPAdjustAmount) ++ StackSize = FirstSPAdjustAmount; ++ ++ if (!StackSize) ++ return; ++ ++ // Final adjust stack. ++ TII.adjustReg(SP, SP, StackSize, MBB, MBBI); + } + +-StackOffset LoongArchFrameLowering::getFrameIndexReference( +- const MachineFunction &MF, int FI, Register &FrameReg) const { ++StackOffset ++LoongArchFrameLowering::getFrameIndexReference(const MachineFunction &MF, ++ int FI, ++ Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ const auto *LoongArchFI = MF.getInfo(); + + // Callee-saved registers should be referenced relative to the stack + // pointer (positive offset), otherwise use the frame pointer (negative +@@ -207,17 +380,182 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( + StackOffset Offset = + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + +- FrameReg = RI->getFrameRegister(MF); +- if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { +- FrameReg = LoongArch::R3; ++ bool EhDataRegFI = LoongArchFI->isEhDataRegFI(FI); ++ if ((FI >= MinCSFI && FI <= MaxCSFI) || EhDataRegFI) { ++ FrameReg = ABI.GetStackPtr(); ++ ++ if (FirstSPAdjustAmount) ++ Offset += StackOffset::getFixed(FirstSPAdjustAmount); ++ else ++ Offset += StackOffset::getFixed(MFI.getStackSize()); ++ } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { ++ // If the stack was realigned, the frame pointer is set in order to allow ++ // SP to be restored, so we need another base register to record the stack ++ // after realignment. ++ FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); + Offset += StackOffset::getFixed(MFI.getStackSize()); ++ } else { ++ FrameReg = RI->getFrameRegister(MF); ++ if (hasFP(MF)) ++ Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); ++ else ++ Offset += StackOffset::getFixed(MFI.getStackSize()); + } +- + return Offset; + } ++ ++bool LoongArchFrameLowering::spillCalleeSavedRegisters( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ++ ArrayRef CSI, const TargetRegisterInfo *TRI) const { ++ MachineFunction *MF = MBB.getParent(); ++ const TargetInstrInfo &TII = *STI.getInstrInfo(); ++ ++ for (unsigned i = 0, e = CSI.size(); i != e; ++i) { ++ // Add the callee-saved register as live-in. Do not add if the register is ++ // RA and return address is taken, because it has already been added in ++ // method LoongArchTargetLowering::lowerRETURNADDR. ++ // It's killed at the spill, unless the register is RA and return address ++ // is taken. ++ unsigned Reg = CSI[i].getReg(); ++ bool IsRAAndRetAddrIsTaken = (Reg == LoongArch::RA || Reg == LoongArch::RA_64) ++ && MF->getFrameInfo().isReturnAddressTaken(); ++ if (!IsRAAndRetAddrIsTaken) ++ MBB.addLiveIn(Reg); ++ ++ // Insert the spill to the stack frame. ++ bool IsKill = !IsRAAndRetAddrIsTaken; ++ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); ++ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, ++ CSI[i].getFrameIdx(), RC, TRI); ++ } ++ ++ return true; ++} ++ ++bool ++LoongArchFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ // Reserve call frame if the size of the maximum call frame fits into 12-bit ++ // immediate field and there are no variable sized objects on the stack. ++ // Make sure the second register scavenger spill slot can be accessed with one ++ // instruction. ++ return isInt<12>(MFI.getMaxCallFrameSize() + getStackAlignment()) && ++ !MFI.hasVarSizedObjects(); ++} ++ ++/// Mark \p Reg and all registers aliasing it in the bitset. ++static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, ++ unsigned Reg) { ++ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); ++ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ++ SavedRegs.set(*AI); ++} ++ ++void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, ++ BitVector &SavedRegs, ++ RegScavenger *RS) const { ++ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); ++ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned FP = ABI.GetFramePtr(); ++ unsigned BP = ABI.IsLP64() ? LoongArch::S7_64 : LoongArch::S7; ++ ++ // Mark $fp as used if function has dedicated frame pointer. ++ if (hasFP(MF)) ++ setAliasRegs(MF, SavedRegs, FP); ++ // Mark $s7 as used if function has dedicated base pointer. ++ if (hasBP(MF)) ++ setAliasRegs(MF, SavedRegs, BP); ++ ++ // Create spill slots for eh data registers if function calls eh_return. ++ if (LoongArchFI->callsEhReturn()) ++ LoongArchFI->createEhDataRegsFI(); ++ ++ // Set scavenging frame index if necessary. ++ uint64_t MaxSPOffset = estimateStackSize(MF); ++ ++ // If there is a variable ++ // sized object on the stack, the estimation cannot account for it. ++ if (isIntN(12, MaxSPOffset) && ++ !MF.getFrameInfo().hasVarSizedObjects()) ++ return; ++ ++ const TargetRegisterClass &RC = ++ ABI.ArePtrs64bit() ? LoongArch::GPR64RegClass : LoongArch::GPR32RegClass; ++ int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), ++ TRI->getSpillAlign(RC), false); ++ RS->addScavengingFrameIndex(FI); ++} ++ ++// hasFP - Return true if the specified function should have a dedicated frame ++// pointer register. This is true if the function has variable sized allocas, ++// if it needs dynamic stack realignment, if frame pointer elimination is ++// disabled, or if the frame address is taken. ++bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); ++ ++ return MF.getTarget().Options.DisableFramePointerElim(MF) || ++ MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || ++ TRI->hasStackRealignment(MF); ++} ++ ++bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); ++ ++ return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); ++} ++ ++// Estimate the size of the stack, including the incoming arguments. We need to ++// account for register spills, local objects, reserved call frame and incoming ++// arguments. This is required to determine the largest possible positive offset ++// from $sp so that it can be determined if an emergency spill slot for stack ++// addresses is required. ++uint64_t LoongArchFrameLowering:: ++estimateStackSize(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ++ ++ int64_t Size = 0; ++ ++ // Iterate over fixed sized objects which are incoming arguments. ++ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) ++ if (MFI.getObjectOffset(I) > 0) ++ Size += MFI.getObjectSize(I); ++ ++ // Conservatively assume all callee-saved registers will be saved. ++ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { ++ unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); ++ Size = alignTo(Size + RegSize, RegSize); ++ } ++ ++ // Get the size of the rest of the frame objects and any possible reserved ++ // call frame, accounting for alignment. ++ return Size + MFI.estimateStackSize(MF); ++} ++ ++// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions ++MachineBasicBlock::iterator LoongArchFrameLowering:: ++eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ unsigned SP = STI.getABI().IsLP64() ? LoongArch::SP_64 : LoongArch::SP; ++ ++ if (!hasReservedCallFrame(MF)) { ++ int64_t Amount = I->getOperand(0).getImm(); ++ if (I->getOpcode() == LoongArch::ADJCALLSTACKDOWN) ++ Amount = -Amount; ++ ++ STI.getInstrInfo()->adjustReg(SP, SP, Amount, MBB, I); ++ } ++ ++ return MBB.erase(I); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +index 72d8e006a..74aabaeb4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +@@ -1,4 +1,4 @@ +-//=- LoongArchFrameLowering.h - TargetFrameLowering for LoongArch -*- C++ -*--// ++//===-- LoongArchFrameLowering.h - Define frame lowering for LoongArch ----*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,51 +6,66 @@ + // + //===----------------------------------------------------------------------===// + // +-// This class implements LoongArch-specific bits of TargetFrameLowering class. ++// + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H + ++#include "LoongArch.h" + #include "llvm/CodeGen/TargetFrameLowering.h" + + namespace llvm { +-class LoongArchSubtarget; ++ class LoongArchSubtarget; + + class LoongArchFrameLowering : public TargetFrameLowering { + const LoongArchSubtarget &STI; + + public: +- explicit LoongArchFrameLowering(const LoongArchSubtarget &STI) +- : TargetFrameLowering(StackGrowsDown, +- /*StackAlignment=*/Align(16), +- /*LocalAreaOffset=*/0), +- STI(STI) {} ++ explicit LoongArchFrameLowering(const LoongArchSubtarget &STI); + ++ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into ++ /// the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + ++ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, ++ Register &FrameReg) const override; ++ ++ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ ArrayRef CSI, ++ const TargetRegisterInfo *TRI) const override; ++ ++ bool hasReservedCallFrame(const MachineFunction &MF) const override; ++ + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + +- MachineBasicBlock::iterator +- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MI) const override { +- return MBB.erase(MI); ++ bool hasFP(const MachineFunction &MF) const override; ++ ++ bool hasBP(const MachineFunction &MF) const; ++ ++ bool enableShrinkWrapping(const MachineFunction &MF) const override { ++ return true; + } + +- StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, +- Register &FrameReg) const override; ++ MachineBasicBlock::iterator ++ eliminateCallFramePseudoInstr(MachineFunction &MF, ++ MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const override; + +- bool hasFP(const MachineFunction &MF) const override; +- bool hasBP(const MachineFunction &MF) const; ++ // Get the first stack adjustment amount for split the SP adjustment. ++ // Return 0 if we don't want to to split the SP adjustment in prologue and ++ // epilogue. ++ uint64_t getFirstSPAdjustAmount(const MachineFunction &MF, ++ bool IsPrologue = false) const; + +-private: +- void determineFrameLayout(MachineFunction &MF) const; +- void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, Register DestReg, Register SrcReg, +- int64_t Val, MachineInstr::MIFlag Flag) const; ++protected: ++ uint64_t estimateStackSize(const MachineFunction &MF) const; + }; +-} // end namespace llvm +-#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H ++ ++} // End llvm namespace ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +index bb40ff817..0efb739e0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -1,4 +1,4 @@ +-//=- LoongArchISelDAGToDAG.cpp - A dag to dag inst selector for LoongArch -===// ++//===-- LoongArchISelDAGToDAG.cpp - A Dag to Dag Inst Selector for LoongArch --------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,176 +11,868 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchISelDAGToDAG.h" +-#include "LoongArchISelLowering.h" ++#include "LoongArch.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchRegisterInfo.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" +-#include "MCTargetDesc/LoongArchMatInt.h" +-#include "llvm/Support/KnownBits.h" +- ++#include "llvm/CodeGen/MachineConstantPool.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" ++#include "llvm/IR/CFG.h" ++#include "llvm/IR/Dominators.h" ++#include "llvm/IR/GlobalValue.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/IR/Intrinsics.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" ++#include "llvm/IR/Type.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" + using namespace llvm; + + #define DEBUG_TYPE "loongarch-isel" + +-void LoongArchDAGToDAGISel::Select(SDNode *Node) { +- // If we have a custom node, we have already selected. +- if (Node->isMachineOpcode()) { +- LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); +- Node->setNodeId(-1); +- return; +- } ++//===----------------------------------------------------------------------===// ++// Instruction Selector Implementation ++//===----------------------------------------------------------------------===// + +- // Instruction Selection not handled by the auto-generated tablegen selection +- // should be handled here. +- unsigned Opcode = Node->getOpcode(); +- MVT GRLenVT = Subtarget->getGRLenVT(); +- SDLoc DL(Node); +- MVT VT = Node->getSimpleValueType(0); ++//===----------------------------------------------------------------------===// ++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine ++// instructions for SelectionDAG operations. ++//===----------------------------------------------------------------------===// + +- switch (Opcode) { +- default: +- break; +- case ISD::Constant: { +- int64_t Imm = cast(Node)->getSExtValue(); +- if (Imm == 0 && VT == GRLenVT) { +- SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, +- LoongArch::R0, GRLenVT); +- ReplaceNode(Node, New.getNode()); +- return; +- } +- SDNode *Result = nullptr; +- SDValue SrcReg = CurDAG->getRegister(LoongArch::R0, GRLenVT); +- // The instructions in the sequence are handled here. +- for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { +- SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT); +- if (Inst.Opc == LoongArch::LU12I_W) +- Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm); +- else +- Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm); +- SrcReg = SDValue(Result, 0); +- } ++void LoongArchDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } + +- ReplaceNode(Node, Result); +- return; ++void LoongArchDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { ++ AU.addRequired(); ++ SelectionDAGISel::getAnalysisUsage(AU); ++} ++ ++bool LoongArchDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { ++ Subtarget = &static_cast(MF.getSubtarget()); ++ bool Ret = SelectionDAGISel::runOnMachineFunction(MF); ++ ++ return Ret; ++} ++ ++/// Match frameindex ++bool LoongArchDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { ++ EVT ValTy = Addr.getValueType(); ++ ++ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); ++ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); ++ return true; + } +- case ISD::FrameIndex: { +- SDValue Imm = CurDAG->getTargetConstant(0, DL, GRLenVT); +- int FI = cast(Node)->getIndex(); +- SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); +- unsigned ADDIOp = +- Subtarget->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; +- ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); +- return; ++ return false; ++} ++ ++/// Match frameindex+offset and frameindex|offset ++bool LoongArchDAGToDAGISel::selectAddrFrameIndexOffset( ++ SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, ++ unsigned ShiftAmount = 0) const { ++ if (CurDAG->isBaseWithConstantOffset(Addr)) { ++ ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); ++ if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { ++ EVT ValTy = Addr.getValueType(); ++ ++ // If the first operand is a FI, get the TargetFI Node ++ if (FrameIndexSDNode *FIN = ++ dyn_cast(Addr.getOperand(0))) ++ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); ++ else { ++ Base = Addr.getOperand(0); ++ // If base is a FI, additional offset calculation is done in ++ // eliminateFrameIndex, otherwise we need to check the alignment ++ const Align Alignment(1ULL << ShiftAmount); ++ if (!isAligned(Alignment, CN->getZExtValue())) ++ return false; ++ } ++ ++ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ++ ValTy); ++ return true; ++ } + } +- // TODO: Add selection nodes needed later. ++ return false; ++} ++ ++/// ComplexPattern used on LoongArchInstrInfo ++/// Used on LoongArch Load/Store instructions ++bool LoongArchDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ // if Address is FI, get the TargetFrameIndex. ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (!TM.isPositionIndependent()) { ++ if ((Addr.getOpcode() == ISD::TargetExternalSymbol || ++ Addr.getOpcode() == ISD::TargetGlobalAddress)) ++ return false; + } + +- // Select the default instruction. +- SelectCode(Node); ++ // Addresses of the form FI+const or FI|const ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return false; + } + +-bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { +- // If this is FrameIndex, select it directly. Otherwise just let it get +- // selected to a register independently. +- if (auto *FIN = dyn_cast(Addr)) +- Base = +- CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getGRLenVT()); +- else +- Base = Addr; ++/// ComplexPattern used on LoongArchInstrInfo ++/// Used on LoongArch Load/Store instructions ++bool LoongArchDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ Base = Addr; ++ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); + return true; + } + +-bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, +- SDValue &ShAmt) { +- // Shift instructions on LoongArch only read the lower 5 or 6 bits of the +- // shift amount. If there is an AND on the shift amount, we can bypass it if +- // it doesn't affect any of those bits. +- if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { +- const APInt &AndMask = N->getConstantOperandAPInt(1); ++bool LoongArchDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ return selectAddrRegImm(Addr, Base, Offset) || ++ selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; + +- // Since the max shift amount is a power of 2 we can subtract 1 to make a +- // mask that covers the bits needed to represent all shift amounts. +- assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); +- APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1)) ++ return true; + +- if (ShMask.isSubsetOf(AndMask)) { +- ShAmt = N.getOperand(0); ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11, 1)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9, 3)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 14, 2)) ++ return true; ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++// Select constant vector splats. ++// ++// Returns true and sets Imm if: ++// * LSX is enabled ++// * N is a ISD::BUILD_VECTOR representing a constant splat ++bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, ++ unsigned MinSizeInBits) const { ++ if (!(Subtarget->hasLSX() || Subtarget->hasLASX())) ++ return false; ++ ++ BuildVectorSDNode *Node = dyn_cast(N); ++ ++ if (!Node) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ MinSizeInBits)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++// Select constant vector splats. ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value fits in an integer with the specified signed-ness and ++// width. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++// ++// It's worth noting that this function is not used as part of the selection ++// of [v/xv]ldi.[bhwd] since it does not permit using the wrong-typed ++// [v/xv]ldi.[bhwd] instruction to achieve the desired bit pattern. ++// [v/xv]ldi.[bhwd] is selected in LoongArchDAGToDAGISel::selectNode. ++bool LoongArchDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, ++ bool Signed, ++ unsigned ImmBitSize) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ ++ if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) || ++ (!Signed && ImmValue.isIntN(ImmBitSize))) { ++ Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); + return true; + } ++ } ++ ++ return false; ++} ++ ++// Select constant vector splats. ++bool LoongArchDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 1); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 2); ++} + +- // SimplifyDemandedBits may have optimized the mask so try restoring any +- // bits that are known zero. +- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); +- if (ShMask.isSubsetOf(AndMask | Known.Zero)) { +- ShAmt = N.getOperand(0); ++bool LoongArchDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 3); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 4); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 5); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 6); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 8); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, true, 5); ++} ++ ++// Select constant vector splats whose value is a power of 2. ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a power of two. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, ++ SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = ImmValue.exactLogBase2(); ++ ++ if (Log2 != -1) { ++ Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } +- } else if (N.getOpcode() == LoongArchISD::BSTRPICK) { +- // Similar to the above AND, if there is a BSTRPICK on the shift amount, we +- // can bypass it. +- assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); +- assert(isa(N.getOperand(1)) && "Illegal msb operand!"); +- assert(isa(N.getOperand(2)) && "Illegal lsb operand!"); +- uint64_t msb = N.getConstantOperandVal(1), lsb = N.getConstantOperandVal(2); +- if (lsb == 0 && Log2_32(ShiftWidth) <= msb + 1) { +- ShAmt = N.getOperand(0); ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, ++ SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = (~ImmValue).exactLogBase2(); ++ ++ if (Log2 != -1) { ++ Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } +- } else if (N.getOpcode() == ISD::SUB && +- isa(N.getOperand(0))) { +- uint64_t Imm = N.getConstantOperandVal(0); +- // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to +- // generate a NEG instead of a SUB of a constant. +- if (Imm != 0 && Imm % ShiftWidth == 0) { +- SDLoc DL(N); +- EVT VT = N.getValueType(); +- SDValue Zero = +- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT); +- unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W; +- MachineSDNode *Neg = +- CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1)); +- ShAmt = SDValue(Neg, 0); ++ } ++ ++ return false; ++} ++ ++// Select constant vector splats whose value only has a consecutive sequence ++// of left-most bits set (e.g. 0b11...1100...00). ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a consecutive sequence of left-most bits. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ // Extract the run of set bits starting with bit zero from the bitwise ++ // inverse of ImmValue, and test that the inverse of this is the same ++ // as the original value. ++ if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { ++ ++ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), ++ EltTy); + return true; + } + } + +- ShAmt = N; +- return true; ++ return false; + } + +-bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { +- if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && +- cast(N.getOperand(1))->getVT() == MVT::i32) { +- Val = N.getOperand(0); +- return true; +- } +- MVT VT = N.getSimpleValueType(); +- if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { +- Val = N; +- return true; ++// Select constant vector splats whose value only has a consecutive sequence ++// of right-most bits set (e.g. 0b00...0011...11). ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a consecutive sequence of right-most bits. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ // Extract the run of set bits starting with bit zero, and test that the ++ // result is the same as the original value ++ if (ImmValue == (ImmValue & ~(ImmValue + 1))) { ++ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), ++ EltTy); ++ return true; ++ } + } + + return false; + } + +-bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { +- if (N.getOpcode() == ISD::AND) { +- auto *C = dyn_cast(N.getOperand(1)); +- if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { +- Val = N.getOperand(0); ++bool LoongArchDAGToDAGISel::trySelect(SDNode *Node) { ++ unsigned Opcode = Node->getOpcode(); ++ SDLoc DL(Node); ++ ++ /// ++ // Instruction Selection not handled by the auto-generated ++ // tablegen selection should be handled here. ++ /// ++ switch(Opcode) { ++ default: break; ++ case ISD::ConstantFP: { ++ ConstantFPSDNode *CN = dyn_cast(Node); ++ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { ++ if (Subtarget->is64Bit()) { ++ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ++ LoongArch::ZERO_64, MVT::i64); ++ ReplaceNode(Node, ++ CurDAG->getMachineNode(LoongArch::MOVGR2FR_D, DL, MVT::f64, Zero)); ++ } + return true; + } ++ break; + } +- MVT VT = N.getSimpleValueType(); +- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); +- if (CurDAG->MaskedValueIsZero(N, Mask)) { +- Val = N; ++ ++ case ISD::Constant: { ++ const ConstantSDNode *CN = dyn_cast(Node); ++ MVT VT = CN->getSimpleValueType(0); ++ int64_t Imm = CN->getSExtValue(); ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, VT == MVT::i64); ++ SDLoc DL(CN); ++ SDNode *Result = nullptr; ++ SDValue SrcReg = CurDAG->getRegister( ++ VT == MVT::i64 ? LoongArch::ZERO_64 : LoongArch::ZERO, VT); ++ ++ // The instructions in the sequence are handled here. ++ for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { ++ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); ++ else ++ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); ++ SrcReg = SDValue(Result, 0); ++ } ++ ReplaceNode(Node, Result); + return true; + } + ++ case ISD::BUILD_VECTOR: { ++ // Select appropriate vldi.[bhwd] instructions for constant splats of ++ // 128-bit when LSX is enabled. Select appropriate xvldi.[bhwd] instructions ++ // for constant splats of 256-bit when LASX is enabled. Fixup any register ++ // class mismatches that occur as a result. ++ // ++ // This allows the compiler to use a wider range of immediates than would ++ // otherwise be allowed. If, for example, v4i32 could only use [v/xv]ldi.h ++ // then it would not be possible to load { 0x01010101, 0x01010101, ++ // 0x01010101, 0x01010101 } without using a constant pool. This would be ++ // sub-optimal when // '[v/xv]ldi.b vd, 1' is capable of producing that ++ // bit-pattern in the same set/ of registers. Similarly, [v/xv]ldi.h isn't ++ // capable of producing { 0x00000000, 0x00000001, 0x00000000, 0x00000001 } ++ // but '[v/xv]ldi.d vd, 1' can. ++ ++ const LoongArchABIInfo &ABI = ++ static_cast(TM).getABI(); ++ ++ BuildVectorSDNode *BVN = cast(Node); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ unsigned LdiOp; ++ EVT ResVecTy = BVN->getValueType(0); ++ EVT ViaVecTy; ++ ++ if ((!Subtarget->hasLSX() || !BVN->getValueType(0).is128BitVector()) && ++ (!Subtarget->hasLASX() || !BVN->getValueType(0).is256BitVector())) ++ return false; ++ ++ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, ++ HasAnyUndefs, 8)) ++ return false; ++ ++ bool IsLASX256 = BVN->getValueType(0).is256BitVector(); ++ ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_B : LoongArch::VLDI_B; ++ ViaVecTy = IsLASX256 ? MVT::v32i8 : MVT::v16i8; ++ break; ++ case 16: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_H : LoongArch::VLDI_H; ++ ViaVecTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; ++ break; ++ case 32: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_W : LoongArch::VLDI_W; ++ ViaVecTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; ++ break; ++ case 64: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_D : LoongArch::VLDI_D; ++ ViaVecTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; ++ break; ++ } ++ ++ SDNode *Res; ++ ++ // If we have a signed 13 bit integer, we can splat it directly. ++ // ++ // If we have something bigger we can synthesize the value into a GPR and ++ // splat from there. ++ if (SplatValue.isSignedIntN(10)) { ++ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, ++ ViaVecTy.getVectorElementType()); ++ ++ Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); ++ } else if (SplatValue.isSignedIntN(12)) { ++ bool Is32BitSplat = SplatBitSize < 64 ? true : false; ++ const unsigned ADDIOp = ++ Is32BitSplat ? LoongArch::ADDI_W : LoongArch::ADDI_D; ++ const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; ++ SDValue ZeroVal = CurDAG->getRegister( ++ Is32BitSplat ? LoongArch::ZERO : LoongArch::ZERO_64, SplatMVT); ++ ++ const unsigned FILLOp = ++ (SplatBitSize == 16) ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H) ++ : (SplatBitSize == 32 ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_W ++ : LoongArch::VREPLGR2VR_W) ++ : (SplatBitSize == 64 ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_D ++ : LoongArch::VREPLGR2VR_D) ++ : 0)); ++ ++ assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); ++ ++ short Lo = SplatValue.getLoBits(12).getSExtValue(); ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); ++ ++ Res = CurDAG->getMachineNode(ADDIOp, DL, SplatMVT, ZeroVal, LoVal); ++ Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); ++ } else if (SplatValue.isSignedIntN(16) && SplatBitSize == 16) { ++ const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); ++ const unsigned Hi = SplatValue.lshr(12).getLoBits(4).getZExtValue(); ++ SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); ++ ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); ++ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); ++ if (Hi) ++ Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); ++ ++ if (Lo) ++ Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, ++ Hi ? SDValue(Res, 0) : ZeroVal, LoVal); ++ ++ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H; ++ EVT FILLTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { ++ // Only handle the cases where the splat size agrees with the size ++ // of the SplatValue here. ++ const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); ++ const unsigned Hi = SplatValue.lshr(12).getLoBits(20).getZExtValue(); ++ SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); ++ ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); ++ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); ++ if (Hi) ++ Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); ++ ++ if (Lo) ++ Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, ++ Hi ? SDValue(Res, 0) : ZeroVal, LoVal); ++ ++ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_W : LoongArch::VREPLGR2VR_W; ++ EVT FILLTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ ++ } else if ((SplatValue.isSignedIntN(32) && SplatBitSize == 64 && ++ ABI.IsLP64()) || ++ (SplatValue.isSignedIntN(64))) { ++ ++ int64_t Imm = SplatValue.getSExtValue(); ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, true); ++ SDValue SrcReg = CurDAG->getRegister(LoongArch::ZERO_64, MVT::i64); ++ ++ for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { ++ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, MVT::i64); ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SDImm); ++ else ++ Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SrcReg, SDImm); ++ SrcReg = SDValue(Res, 0); ++ } ++ ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_D : LoongArch::VREPLGR2VR_D; ++ EVT FILLTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ ++ } else ++ return false; ++ ++ if (ResVecTy != ViaVecTy) { ++ // If LdiOp is writing to a different register class to ResVecTy, then ++ // fix it up here. This COPY_TO_REGCLASS should never cause a move.v ++ // since the source and destination register sets contain the same ++ // registers. ++ const TargetLowering *TLI = getTargetLowering(); ++ MVT ResVecTySimple = ResVecTy.getSimpleVT(); ++ const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); ++ Res = CurDAG->getMachineNode( ++ LoongArch::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0), ++ CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); ++ } ++ ++ ReplaceNode(Node, Res); ++ return true; ++ } ++ } ++ + return false; + } + +-// This pass converts a legalized DAG into a LoongArch-specific DAG, ready +-// for instruction scheduling. +-FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { +- return new LoongArchDAGToDAGISel(TM); ++/// Select instructions not customized! Used for ++/// expanded, promoted and normal instructions ++void LoongArchDAGToDAGISel::Select(SDNode *Node) { ++ // If we have a custom node, we already have selected! ++ if (Node->isMachineOpcode()) { ++ LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); ++ Node->setNodeId(-1); ++ return; ++ } ++ ++ // See if subclasses can handle this node. ++ if (trySelect(Node)) ++ return; ++ ++ // Select the default instruction ++ SelectCode(Node); ++} ++ ++bool LoongArchDAGToDAGISel:: ++SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, ++ std::vector &OutOps) { ++ SDValue Base, Offset; ++ ++ switch(ConstraintID) { ++ default: ++ llvm_unreachable("Unexpected asm memory constraint"); ++ // All memory constraints can at least accept raw pointers. ++ case InlineAsm::Constraint_i: ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_m: ++ if (selectAddrRegImm12(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_R: ++ if (selectAddrRegImm12(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_ZC: ++ if (selectIntAddrSImm14Lsl2(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_ZB: ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ } ++ return true; ++} ++ ++// This optimisation is ported from RISCV. ++// Merge an ADDI into the offset of a load/store instruction where possible. ++// (load (addi base, off1), off2) -> (load base, off1+off2) ++// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) ++// This is possible when off1+off2 fits a 12-bit immediate. ++void LoongArchDAGToDAGISel::doPeepholeLoadStoreADDI() { ++ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++ ++Position; ++ ++ while (Position != CurDAG->allnodes_begin()) { ++ SDNode *N = &*--Position; ++ // Skip dead nodes and any non-machine opcodes. ++ if (N->use_empty() || !N->isMachineOpcode()) ++ continue; ++ ++ int OffsetOpIdx; ++ int BaseOpIdx; ++ ++ // TODO: handle more instructions. ++ switch (N->getMachineOpcode()) { ++ default: ++ continue; ++ case LoongArch::LD_B: ++ case LoongArch::LD_B32: ++ case LoongArch::LD_BU: ++ case LoongArch::LD_BU32: ++ case LoongArch::LD_H: ++ case LoongArch::LD_H32: ++ case LoongArch::LD_HU: ++ case LoongArch::LD_HU32: ++ case LoongArch::LD_W: ++ case LoongArch::LD_W32: ++ case LoongArch::LD_WU: ++ case LoongArch::LD_D: ++ BaseOpIdx = 0; ++ OffsetOpIdx = 1; ++ break; ++ case LoongArch::ST_B: ++ case LoongArch::ST_B32: ++ case LoongArch::ST_H: ++ case LoongArch::ST_H32: ++ case LoongArch::ST_W: ++ case LoongArch::ST_W32: ++ case LoongArch::ST_D: ++ BaseOpIdx = 1; ++ OffsetOpIdx = 2; ++ break; ++ } ++ ++ if (!isa(N->getOperand(OffsetOpIdx))) ++ continue; ++ ++ SDValue Base = N->getOperand(BaseOpIdx); ++ ++ // If the base is an ADDI, we can merge it in to the load/store. ++ // TODO: handle more instructions, i.e. ADDI_W. ++ if (!Base.isMachineOpcode() || Base.getMachineOpcode() != LoongArch::ADDI_D) ++ continue; ++ ++ SDValue ImmOperand = Base.getOperand(1); ++ uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); ++ ++ if (auto *Const = dyn_cast(ImmOperand)) { ++ int64_t Offset1 = Const->getSExtValue(); ++ int64_t CombinedOffset = Offset1 + Offset2; ++ if (!isInt<12>(CombinedOffset)) ++ continue; ++ ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), ++ ImmOperand.getValueType()); ++ // TODO: handle below cases. ++#if 0 ++ } else if (auto *GA = dyn_cast(ImmOperand)) { ++ // If the off1 in (addi base, off1) is a global variable's address (its ++ // low part, really), then we can rely on the alignment of that variable ++ // to provide a margin of safety before off1 can overflow the 12 bits. ++ // Check if off2 falls within that margin; if so off1+off2 can't overflow. ++ const DataLayout &DL = CurDAG->getDataLayout(); ++ Align Alignment = GA->getGlobal()->getPointerAlignment(DL); ++ if (Offset2 != 0 && Alignment <= Offset2) ++ continue; ++ int64_t Offset1 = GA->getOffset(); ++ int64_t CombinedOffset = Offset1 + Offset2; ++ ImmOperand = CurDAG->getTargetGlobalAddress( ++ GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), ++ CombinedOffset, GA->getTargetFlags()); ++ } else if (auto *CP = dyn_cast(ImmOperand)) { ++ // Ditto. ++ Align Alignment = CP->getAlign(); ++ if (Offset2 != 0 && Alignment <= Offset2) ++ continue; ++ int64_t Offset1 = CP->getOffset(); ++ int64_t CombinedOffset = Offset1 + Offset2; ++ ImmOperand = CurDAG->getTargetConstantPool( ++ CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), ++ CombinedOffset, CP->getTargetFlags()); ++#endif ++ } else { ++ continue; ++ } ++ ++ LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); ++ LLVM_DEBUG(Base->dump(CurDAG)); ++ LLVM_DEBUG(dbgs() << "\nN: "); ++ LLVM_DEBUG(N->dump(CurDAG)); ++ LLVM_DEBUG(dbgs() << "\n"); ++ ++ // Modify the offset operand of the load/store. ++ if (BaseOpIdx == 0) // Load ++ CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, ++ N->getOperand(2)); ++ else // Store ++ CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), ++ ImmOperand, N->getOperand(3)); ++ ++ // The add-immediate may now be dead, in which case remove it. ++ if (Base.getNode()->use_empty()) ++ CurDAG->RemoveDeadNode(Base.getNode()); ++ } ++} ++ ++FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, ++ CodeGenOpt::Level OptLevel) { ++ return new LoongArchDAGToDAGISel(TM, OptLevel); + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +index 8c9357d75..765497318 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +@@ -1,4 +1,4 @@ +-//=- LoongArchISelDAGToDAG.h - A dag to dag inst selector for LoongArch ---===// ++//===---- LoongArchISelDAGToDAG.h - A Dag to Dag Inst Selector for LoongArch --------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -14,47 +14,138 @@ + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H + + #include "LoongArch.h" ++#include "LoongArchSubtarget.h" + #include "LoongArchTargetMachine.h" + #include "llvm/CodeGen/SelectionDAGISel.h" + +-// LoongArch-specific code to select LoongArch machine instructions for +-// SelectionDAG operations. ++//===----------------------------------------------------------------------===// ++// Instruction Selector Implementation ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine ++// instructions for SelectionDAG operations. ++//===----------------------------------------------------------------------===// + namespace llvm { +-class LoongArchDAGToDAGISel : public SelectionDAGISel { +- const LoongArchSubtarget *Subtarget = nullptr; + ++class LoongArchDAGToDAGISel : public SelectionDAGISel { + public: +- explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM) +- : SelectionDAGISel(TM) {} ++ explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, CodeGenOpt::Level OL) ++ : SelectionDAGISel(TM, OL), Subtarget(nullptr) {} + ++ // Pass Name + StringRef getPassName() const override { + return "LoongArch DAG->DAG Pattern Instruction Selection"; + } + +- bool runOnMachineFunction(MachineFunction &MF) override { +- Subtarget = &MF.getSubtarget(); +- return SelectionDAGISel::runOnMachineFunction(MF); +- } ++ bool runOnMachineFunction(MachineFunction &MF) override; + +- void Select(SDNode *Node) override; ++ void PostprocessISelDAG() override; + +- bool SelectBaseAddr(SDValue Addr, SDValue &Base); ++ void getAnalysisUsage(AnalysisUsage &AU) const override; + +- bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); +- bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { +- return selectShiftMask(N, Subtarget->getGRLen(), ShAmt); +- } +- bool selectShiftMask32(SDValue N, SDValue &ShAmt) { +- return selectShiftMask(N, 32, ShAmt); +- } ++private: ++ /// Keep a pointer to the LoongArchSubtarget around so that we can make the right ++ /// decision when generating code for different targets. ++ const LoongArchSubtarget *Subtarget; ++ // Include the pieces autogenerated from the target description. ++ #include "LoongArchGenDAGISel.inc" ++ ++ void doPeepholeLoadStoreADDI(); ++ ++ bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, ++ unsigned OffsetBits, ++ unsigned ShiftAmount) const; ++ ++ // Complex Pattern. ++ /// (reg + imm). ++ bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ /// Fall back on this function if all else fails. ++ bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + +- bool selectSExti32(SDValue N, SDValue &Val); +- bool selectZExti32(SDValue N, SDValue &Val); ++ /// Match integer address pattern. ++ bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ bool selectAddrRegImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ /// Match addr+simm12 and addr ++ bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ /// Select constant vector splats. ++ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; ++ /// Select constant vector splats whose value fits in a given integer. ++ bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, ++ unsigned ImmBitSize) const; ++ /// Select constant vector splats whose value fits in a uimm1. ++ bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm2. ++ bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm3. ++ bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm4. ++ bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm5. ++ bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm6. ++ bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm8. ++ bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a simm5. ++ bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a power of 2. ++ bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is the inverse of a ++ /// power of 2. ++ bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a run of set bits ++ /// ending at the most significant bit ++ bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a run of set bits ++ /// starting at bit zero. ++ bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; ++ ++ void Select(SDNode *N) override; ++ ++ bool trySelect(SDNode *Node); ++ ++ // getImm - Return a target constant with the specified value. ++ inline SDValue getImm(const SDNode *Node, uint64_t Imm) { ++ return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); ++ } + +-// Include the pieces autogenerated from the target description. +-#include "LoongArchGenDAGISel.inc" ++ bool SelectInlineAsmMemoryOperand(const SDValue &Op, ++ unsigned ConstraintID, ++ std::vector &OutOps) override; + }; + +-} // end namespace llvm ++FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, ++ CodeGenOpt::Level OptLevel); ++} + +-#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 93c886434..4c5f3ffd8 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -1,4 +1,4 @@ +-//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// ++//===- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,1094 +6,8250 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file defines the interfaces that LoongArch uses to lower LLVM code into +-// a selection DAG. ++// This file defines the interfaces that LoongArch uses to lower LLVM code into a ++// selection DAG. + // + //===----------------------------------------------------------------------===// + + #include "LoongArchISelLowering.h" +-#include "LoongArch.h" +-#include "LoongArchMachineFunctionInfo.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArchCCState.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchMachineFunction.h" + #include "LoongArchRegisterInfo.h" + #include "LoongArchSubtarget.h" + #include "LoongArchTargetMachine.h" +-#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArchTargetObjectFile.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/APInt.h" ++#include "llvm/ADT/ArrayRef.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/Statistic.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++#include "llvm/CodeGen/FunctionLoweringInfo.h" + #include "llvm/CodeGen/ISDOpcodes.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineJumpTableInfo.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/RuntimeLibcalls.h" ++#include "llvm/CodeGen/SelectionDAG.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" ++#include "llvm/CodeGen/TargetFrameLowering.h" ++#include "llvm/CodeGen/TargetInstrInfo.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/CodeGen/ValueTypes.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/IR/Constants.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/IR/DerivedTypes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/GlobalValue.h" ++#include "llvm/IR/Intrinsics.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" ++#include "llvm/IR/Type.h" ++#include "llvm/IR/Value.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Compiler.h" + #include "llvm/Support/Debug.h" +-#include "llvm/Support/KnownBits.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MachineValueType.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + + using namespace llvm; + +-#define DEBUG_TYPE "loongarch-isel-lowering" ++#define DEBUG_TYPE "loongarch-lower" + +-static cl::opt ZeroDivCheck( +- "loongarch-check-zero-division", cl::Hidden, +- cl::desc("Trap on integer division by zero."), +- cl::init(false)); ++STATISTIC(NumTailCalls, "Number of tail calls"); + +-LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, +- const LoongArchSubtarget &STI) +- : TargetLowering(TM), Subtarget(STI) { ++static cl::opt ++NoZeroDivCheck("mnocheck-zero-division", cl::Hidden, ++ cl::desc("LoongArch: Don't trap on integer division by zero."), ++ cl::init(false)); + +- MVT GRLenVT = Subtarget.getGRLenVT(); +- // Set up the register classes. +- addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); +- if (Subtarget.hasBasicF()) +- addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); +- if (Subtarget.hasBasicD()) +- addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); ++static const MCPhysReg LoongArch64DPRegs[8] = { ++ LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, ++ LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64 ++}; + +- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, +- MVT::i1, Promote); ++// If I is a shifted mask, set the size (SMSize) and the first bit of the ++// mask (SMLsb), and return true. ++// For example, if I is 0x003ff800, (SMLsb, SMSize) = (11, 11). ++static bool isShiftedMask(uint64_t I, uint64_t &SMLsb, uint64_t &SMSize) { ++ if (!isShiftedMask_64(I)) ++ return false; + +- // TODO: add necessary setOperationAction calls later. +- setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); +- setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); +- setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); +- setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); ++ SMSize = countPopulation(I); ++ SMLsb = countTrailingZeros(I); ++ return true; ++} + +- setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); ++SDValue LoongArchTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); ++} + +- if (Subtarget.is64Bit()) { +- setOperationAction(ISD::SHL, MVT::i32, Custom); +- setOperationAction(ISD::SRA, MVT::i32, Custom); +- setOperationAction(ISD::SRL, MVT::i32, Custom); +- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); +- setOperationAction(ISD::BITCAST, MVT::i32, Custom); +- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) +- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); ++SDValue LoongArchTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), ++ N->getOffset(), Flag); ++} ++ ++const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { ++ switch ((LoongArchISD::NodeType)Opcode) { ++ case LoongArchISD::FIRST_NUMBER: break; ++ case LoongArchISD::JmpLink: return "LoongArchISD::JmpLink"; ++ case LoongArchISD::TailCall: return "LoongArchISD::TailCall"; ++ case LoongArchISD::GlobalAddress: return "LoongArchISD::GlobalAddress"; ++ case LoongArchISD::Ret: return "LoongArchISD::Ret"; ++ case LoongArchISD::ERet: return "LoongArchISD::ERet"; ++ case LoongArchISD::EH_RETURN: return "LoongArchISD::EH_RETURN"; ++ case LoongArchISD::FPBrcond: return "LoongArchISD::FPBrcond"; ++ case LoongArchISD::FPCmp: return "LoongArchISD::FPCmp"; ++ case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; ++ case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; ++ case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; ++ case LoongArchISD::DBAR: return "LoongArchISD::DBAR"; ++ case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; ++ case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; ++ case LoongArchISD::VALL_ZERO: ++ return "LoongArchISD::VALL_ZERO"; ++ case LoongArchISD::VANY_ZERO: ++ return "LoongArchISD::VANY_ZERO"; ++ case LoongArchISD::VALL_NONZERO: ++ return "LoongArchISD::VALL_NONZERO"; ++ case LoongArchISD::VANY_NONZERO: ++ return "LoongArchISD::VANY_NONZERO"; ++ case LoongArchISD::VEXTRACT_SEXT_ELT: ++ return "LoongArchISD::VEXTRACT_SEXT_ELT"; ++ case LoongArchISD::VEXTRACT_ZEXT_ELT: ++ return "LoongArchISD::VEXTRACT_ZEXT_ELT"; ++ case LoongArchISD::VNOR: ++ return "LoongArchISD::VNOR"; ++ case LoongArchISD::VSHF: ++ return "LoongArchISD::VSHF"; ++ case LoongArchISD::SHF: ++ return "LoongArchISD::SHF"; ++ case LoongArchISD::VPACKEV: ++ return "LoongArchISD::VPACKEV"; ++ case LoongArchISD::VPACKOD: ++ return "LoongArchISD::VPACKOD"; ++ case LoongArchISD::VILVH: ++ return "LoongArchISD::VILVH"; ++ case LoongArchISD::VILVL: ++ return "LoongArchISD::VILVL"; ++ case LoongArchISD::VPICKEV: ++ return "LoongArchISD::VPICKEV"; ++ case LoongArchISD::VPICKOD: ++ return "LoongArchISD::VPICKOD"; ++ case LoongArchISD::INSVE: ++ return "LoongArchISD::INSVE"; ++ case LoongArchISD::VROR: ++ return "LoongArchISD::VROR"; ++ case LoongArchISD::VRORI: ++ return "LoongArchISD::VRORI"; ++ case LoongArchISD::XVBROADCAST: ++ return "LoongArchISD::XVBROADCAST"; ++ case LoongArchISD::VBROADCAST: ++ return "LoongArchISD::VBROADCAST"; ++ case LoongArchISD::VABSD: ++ return "LoongArchISD::VABSD"; ++ case LoongArchISD::UVABSD: ++ return "LoongArchISD::UVABSD"; ++ case LoongArchISD::XVPICKVE: ++ return "LoongArchISD::XVPICKVE"; ++ case LoongArchISD::XVPERMI: ++ return "LoongArchISD::XVPERMI"; ++ case LoongArchISD::XVSHUF4I: ++ return "LoongArchISD::XVSHUF4I"; ++ case LoongArchISD::REVBD: ++ return "LoongArchISD::REVBD"; ++ case LoongArchISD::FSEL: ++ return "LoongArchISD::FSEL"; + } ++ return nullptr; ++} ++ ++LoongArchTargetLowering::LoongArchTargetLowering(const LoongArchTargetMachine &TM, ++ const LoongArchSubtarget &STI) ++ : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { ++ // Set up the register classes ++ addRegisterClass(MVT::i32, &LoongArch::GPR32RegClass); + +- static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, +- ISD::SETUGT, ISD::SETUGE}; ++ if (Subtarget.is64Bit()) ++ addRegisterClass(MVT::i64, &LoongArch::GPR64RegClass); + +- if (Subtarget.hasBasicF()) { +- setCondCodeAction(FPCCToExpand, MVT::f32, Expand); +- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); ++ // LoongArch does not have i1 type, so use i32 for ++ // setcc operations results (slt, sgt, ...). ++ setBooleanContents(ZeroOrOneBooleanContent); ++ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); ++ ++ // Load extented operations for i1 types must be promoted ++ for (MVT VT : MVT::integer_valuetypes()) { ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); ++ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); ++ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + } +- if (Subtarget.hasBasicD()) { +- setCondCodeAction(FPCCToExpand, MVT::f64, Expand); +- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); +- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); +- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); ++ ++ // LoongArch doesn't have extending float->double load/store. Set LoadExtAction ++ // for f32, f16 ++ for (MVT VT : MVT::fp_valuetypes()) { ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + } + +- setOperationAction(ISD::BR_CC, GRLenVT, Expand); +- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); +- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); +- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); +- if (!Subtarget.is64Bit()) +- setLibcallName(RTLIB::MUL_I128, nullptr); ++ // Set LoadExtAction for f16 vectors to Expand ++ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { ++ MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); ++ if (F16VT.isValid()) ++ setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); ++ } + +- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); +- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); ++ setTruncStoreAction(MVT::f32, MVT::f16, Expand); ++ setTruncStoreAction(MVT::f64, MVT::f16, Expand); ++ ++ setTruncStoreAction(MVT::f64, MVT::f32, Expand); ++ ++ // Used by legalize types to correctly generate the setcc result. ++ // Without this, every float setcc comes with a AND/OR with the result, ++ // we don't want this, since the fpcmp result goes to a flag register, ++ // which is used implicitly by brcond and select operations. ++ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); ++ ++ // LoongArch Custom Operations ++ setOperationAction(ISD::BR_JT, MVT::Other, Expand); ++ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); ++ setOperationAction(ISD::BlockAddress, MVT::i32, Custom); ++ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); ++ setOperationAction(ISD::JumpTable, MVT::i32, Custom); ++ setOperationAction(ISD::ConstantPool, MVT::i32, Custom); ++ setOperationAction(ISD::SELECT, MVT::f32, Custom); ++ setOperationAction(ISD::SELECT, MVT::f64, Custom); ++ setOperationAction(ISD::SELECT, MVT::i32, Custom); ++ setOperationAction(ISD::SETCC, MVT::f32, Custom); ++ setOperationAction(ISD::SETCC, MVT::f64, Custom); ++ setOperationAction(ISD::BRCOND, MVT::Other, Custom); ++ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + +- // Compute derived properties from the register classes. +- computeRegisterProperties(STI.getRegisterInfo()); ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); ++ setOperationAction(ISD::BlockAddress, MVT::i64, Custom); ++ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); ++ setOperationAction(ISD::JumpTable, MVT::i64, Custom); ++ setOperationAction(ISD::ConstantPool, MVT::i64, Custom); ++ setOperationAction(ISD::SELECT, MVT::i64, Custom); ++ setOperationAction(ISD::LOAD, MVT::i64, Legal); ++ setOperationAction(ISD::STORE, MVT::i64, Legal); ++ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); ++ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); ++ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); ++ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); ++ } + +- setStackPointerRegisterToSaveRestore(LoongArch::R3); ++ if (!Subtarget.is64Bit()) { ++ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); ++ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); ++ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); ++ } + +- setBooleanContents(ZeroOrOneBooleanContent); ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); ++ ++ setOperationAction(ISD::SDIV, MVT::i32, Expand); ++ setOperationAction(ISD::SREM, MVT::i32, Expand); ++ setOperationAction(ISD::UDIV, MVT::i32, Expand); ++ setOperationAction(ISD::UREM, MVT::i32, Expand); ++ setOperationAction(ISD::SDIV, MVT::i64, Expand); ++ setOperationAction(ISD::SREM, MVT::i64, Expand); ++ setOperationAction(ISD::UDIV, MVT::i64, Expand); ++ setOperationAction(ISD::UREM, MVT::i64, Expand); ++ ++ // Operations not directly supported by LoongArch. ++ setOperationAction(ISD::BR_CC, MVT::f32, Expand); ++ setOperationAction(ISD::BR_CC, MVT::f64, Expand); ++ setOperationAction(ISD::BR_CC, MVT::i32, Expand); ++ setOperationAction(ISD::BR_CC, MVT::i64, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); ++ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); ++ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); ++ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); ++ setOperationAction(ISD::CTPOP, MVT::i32, Expand); ++ setOperationAction(ISD::CTPOP, MVT::i64, Expand); ++ setOperationAction(ISD::ROTL, MVT::i32, Expand); ++ setOperationAction(ISD::ROTL, MVT::i64, Expand); ++ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); ++ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); ++ ++ setOperationAction(ISD::FSIN, MVT::f32, Expand); ++ setOperationAction(ISD::FSIN, MVT::f64, Expand); ++ setOperationAction(ISD::FCOS, MVT::f32, Expand); ++ setOperationAction(ISD::FCOS, MVT::f64, Expand); ++ setOperationAction(ISD::FSINCOS, MVT::f32, Expand); ++ setOperationAction(ISD::FSINCOS, MVT::f64, Expand); ++ setOperationAction(ISD::FPOW, MVT::f32, Expand); ++ setOperationAction(ISD::FPOW, MVT::f64, Expand); ++ setOperationAction(ISD::FLOG, MVT::f32, Expand); ++ setOperationAction(ISD::FRINT, MVT::f32, Legal); ++ setOperationAction(ISD::FRINT, MVT::f64, Legal); ++ ++ setOperationAction(ISD::FLOG10, MVT::f32, Expand); ++ setOperationAction(ISD::FEXP, MVT::f32, Expand); ++ setOperationAction(ISD::FMA, MVT::f32, Legal); ++ setOperationAction(ISD::FMA, MVT::f64, Legal); ++ setOperationAction(ISD::FREM, MVT::f32, Expand); ++ setOperationAction(ISD::FREM, MVT::f64, Expand); ++ ++ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); ++ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); ++ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); ++ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); ++ ++ // Lower f16 conversion operations into library calls ++ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); ++ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); ++ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); ++ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); ++ ++ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); ++ ++ setOperationAction(ISD::VASTART, MVT::Other, Custom); ++ setOperationAction(ISD::VAARG, MVT::Other, Custom); ++ setOperationAction(ISD::VACOPY, MVT::Other, Expand); ++ setOperationAction(ISD::VAEND, MVT::Other, Expand); ++ ++ // Use the default for now ++ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); ++ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); ++ ++ if (!Subtarget.is64Bit()) { ++ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); ++ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); ++ } + +- setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); ++ if (Subtarget.is64Bit()) { ++ setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); ++ setTruncStoreAction(MVT::i64, MVT::i32, Custom); ++ } + +- // Function alignments. +- const Align FunctionAlignment(4); +- setMinFunctionAlignment(FunctionAlignment); ++ setOperationAction(ISD::TRAP, MVT::Other, Legal); ++ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); ++ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + ++ setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::AssertZext); ++ setTargetDAGCombine(ISD::SHL); ++ setTargetDAGCombine(ISD::SIGN_EXTEND); ++ setTargetDAGCombine(ISD::ZERO_EXTEND); ++ setTargetDAGCombine(ISD::ADD); ++ setTargetDAGCombine(ISD::SUB); ++ setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SRL); +-} ++ setTargetDAGCombine(ISD::SRA); + +-SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, +- SelectionDAG &DAG) const { +- switch (Op.getOpcode()) { +- default: +- report_fatal_error("unimplemented operand"); +- case ISD::GlobalAddress: +- return lowerGlobalAddress(Op, DAG); +- case ISD::SHL_PARTS: +- return lowerShiftLeftParts(Op, DAG); +- case ISD::SRA_PARTS: +- return lowerShiftRightParts(Op, DAG, true); +- case ISD::SRL_PARTS: +- return lowerShiftRightParts(Op, DAG, false); +- case ISD::SHL: +- case ISD::SRA: +- case ISD::SRL: +- // This can be called for an i32 shift amount that needs to be promoted. +- assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && +- "Unexpected custom legalisation"); +- return SDValue(); +- case ISD::ConstantPool: +- return lowerConstantPool(Op, DAG); +- case ISD::FP_TO_SINT: +- return lowerFP_TO_SINT(Op, DAG); +- case ISD::BITCAST: +- return lowerBITCAST(Op, DAG); +- case ISD::FP_TO_UINT: +- return SDValue(); +- case ISD::UINT_TO_FP: +- return lowerUINT_TO_FP(Op, DAG); ++ if (ABI.IsLP32()) { ++ // These libcalls are not available in 32-bit. ++ setLibcallName(RTLIB::SHL_I128, nullptr); ++ setLibcallName(RTLIB::SRL_I128, nullptr); ++ setLibcallName(RTLIB::SRA_I128, nullptr); + } +-} + +-SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, +- SelectionDAG &DAG) const { ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ // Expand all truncating stores and extending loads. ++ for (MVT VT0 : MVT::vector_valuetypes()) { ++ for (MVT VT1 : MVT::vector_valuetypes()) { ++ setTruncStoreAction(VT0, VT1, Expand); ++ setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); ++ setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); ++ } ++ } ++ } + +- SDLoc DL(Op); +- auto &TLI = DAG.getTargetLoweringInfo(); +- SDValue Tmp1, Tmp2; +- SDValue Op1 = Op.getOperand(0); +- if (Op1->getOpcode() == ISD::AssertZext || +- Op1->getOpcode() == ISD::AssertSext) +- return Op; +- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); +- SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); +- SDNode *N = Res.getNode(); +- TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); +- return Tmp1; +-} ++ if (Subtarget.hasLSX()) { ++ addLSXIntType(MVT::v16i8, &LoongArch::LSX128BRegClass); ++ addLSXIntType(MVT::v8i16, &LoongArch::LSX128HRegClass); ++ addLSXIntType(MVT::v4i32, &LoongArch::LSX128WRegClass); ++ addLSXIntType(MVT::v2i64, &LoongArch::LSX128DRegClass); ++ addLSXFloatType(MVT::v4f32, &LoongArch::LSX128WRegClass); ++ addLSXFloatType(MVT::v2f64, &LoongArch::LSX128DRegClass); ++ ++ // f16 is a storage-only type, always promote it to f32. ++ setOperationAction(ISD::SETCC, MVT::f16, Promote); ++ setOperationAction(ISD::BR_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT, MVT::f16, Promote); ++ setOperationAction(ISD::FADD, MVT::f16, Promote); ++ setOperationAction(ISD::FSUB, MVT::f16, Promote); ++ setOperationAction(ISD::FMUL, MVT::f16, Promote); ++ setOperationAction(ISD::FDIV, MVT::f16, Promote); ++ setOperationAction(ISD::FREM, MVT::f16, Promote); ++ setOperationAction(ISD::FMA, MVT::f16, Promote); ++ setOperationAction(ISD::FNEG, MVT::f16, Promote); ++ setOperationAction(ISD::FABS, MVT::f16, Promote); ++ setOperationAction(ISD::FCEIL, MVT::f16, Promote); ++ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); ++ setOperationAction(ISD::FCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); ++ setOperationAction(ISD::FFLOOR, MVT::f16, Promote); ++ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); ++ setOperationAction(ISD::FPOW, MVT::f16, Promote); ++ setOperationAction(ISD::FPOWI, MVT::f16, Promote); ++ setOperationAction(ISD::FRINT, MVT::f16, Promote); ++ setOperationAction(ISD::FSIN, MVT::f16, Promote); ++ setOperationAction(ISD::FSINCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FSQRT, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG10, MVT::f16, Promote); ++ setOperationAction(ISD::FROUND, MVT::f16, Promote); ++ setOperationAction(ISD::FTRUNC, MVT::f16, Promote); ++ setOperationAction(ISD::FMINNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRA); ++ setTargetDAGCombine(ISD::VSELECT); ++ setTargetDAGCombine(ISD::XOR); ++ } + +-SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, +- SelectionDAG &DAG) const { ++ if (Subtarget.hasLASX()) { ++ addLASXIntType(MVT::v32i8, &LoongArch::LASX256BRegClass); ++ addLASXIntType(MVT::v16i16, &LoongArch::LASX256HRegClass); ++ addLASXIntType(MVT::v8i32, &LoongArch::LASX256WRegClass); ++ addLASXIntType(MVT::v4i64, &LoongArch::LASX256DRegClass); ++ addLASXFloatType(MVT::v8f32, &LoongArch::LASX256WRegClass); ++ addLASXFloatType(MVT::v4f64, &LoongArch::LASX256DRegClass); ++ ++ // f16 is a storage-only type, always promote it to f32. ++ setOperationAction(ISD::SETCC, MVT::f16, Promote); ++ setOperationAction(ISD::BR_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT, MVT::f16, Promote); ++ setOperationAction(ISD::FADD, MVT::f16, Promote); ++ setOperationAction(ISD::FSUB, MVT::f16, Promote); ++ setOperationAction(ISD::FMUL, MVT::f16, Promote); ++ setOperationAction(ISD::FDIV, MVT::f16, Promote); ++ setOperationAction(ISD::FREM, MVT::f16, Promote); ++ setOperationAction(ISD::FMA, MVT::f16, Promote); ++ setOperationAction(ISD::FNEG, MVT::f16, Promote); ++ setOperationAction(ISD::FABS, MVT::f16, Promote); ++ setOperationAction(ISD::FCEIL, MVT::f16, Promote); ++ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); ++ setOperationAction(ISD::FCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); ++ setOperationAction(ISD::FFLOOR, MVT::f16, Promote); ++ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); ++ setOperationAction(ISD::FPOW, MVT::f16, Promote); ++ setOperationAction(ISD::FPOWI, MVT::f16, Promote); ++ setOperationAction(ISD::FRINT, MVT::f16, Promote); ++ setOperationAction(ISD::FSIN, MVT::f16, Promote); ++ setOperationAction(ISD::FSINCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FSQRT, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG10, MVT::f16, Promote); ++ setOperationAction(ISD::FROUND, MVT::f16, Promote); ++ setOperationAction(ISD::FTRUNC, MVT::f16, Promote); ++ setOperationAction(ISD::FMINNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRA); ++ setTargetDAGCombine(ISD::VSELECT); ++ setTargetDAGCombine(ISD::XOR); ++ } + +- SDLoc DL(Op); +- SDValue Op0 = Op.getOperand(0); ++ if (!Subtarget.useSoftFloat()) { ++ addRegisterClass(MVT::f32, &LoongArch::FGR32RegClass); + +- if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && +- Subtarget.is64Bit() && Subtarget.hasBasicF()) { +- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); +- return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); ++ // When dealing with single precision only, use libcalls ++ if (!Subtarget.isSingleFloat()) { ++ if (Subtarget.isFP64bit()) ++ addRegisterClass(MVT::f64, &LoongArch::FGR64RegClass); ++ } + } +- return Op; +-} + +-SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, +- SelectionDAG &DAG) const { ++ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + +- SDLoc DL(Op); ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::MUL, MVT::i64, Custom); ++ ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); ++ setOperationAction(ISD::SDIVREM, MVT::i64, Custom); ++ setOperationAction(ISD::UDIVREM, MVT::i64, Custom); ++ } + +- if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && +- !Subtarget.hasBasicD()) { +- SDValue Dst = +- DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); +- return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ ++ setOperationAction(ISD::SDIVREM, MVT::i32, Custom); ++ setOperationAction(ISD::UDIVREM, MVT::i32, Custom); ++ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); ++ setOperationAction(ISD::LOAD, MVT::i32, Legal); ++ setOperationAction(ISD::STORE, MVT::i32, Legal); ++ ++ setTargetDAGCombine(ISD::MUL); ++ ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); ++ ++ // Replace the accumulator-based multiplies with a ++ // three register instruction. ++ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); ++ setOperationAction(ISD::MUL, MVT::i32, Legal); ++ setOperationAction(ISD::MULHS, MVT::i32, Legal); ++ setOperationAction(ISD::MULHU, MVT::i32, Legal); ++ ++ // Replace the accumulator-based division/remainder with separate ++ // three register division and remainder instructions. ++ setOperationAction(ISD::SDIVREM, MVT::i32, Expand); ++ setOperationAction(ISD::UDIVREM, MVT::i32, Expand); ++ setOperationAction(ISD::SDIV, MVT::i32, Legal); ++ setOperationAction(ISD::UDIV, MVT::i32, Legal); ++ setOperationAction(ISD::SREM, MVT::i32, Legal); ++ setOperationAction(ISD::UREM, MVT::i32, Legal); ++ ++ // Replace the accumulator-based multiplies with a ++ // three register instruction. ++ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); ++ setOperationAction(ISD::MUL, MVT::i64, Legal); ++ setOperationAction(ISD::MULHS, MVT::i64, Legal); ++ setOperationAction(ISD::MULHU, MVT::i64, Legal); ++ ++ // Replace the accumulator-based division/remainder with separate ++ // three register division and remainder instructions. ++ setOperationAction(ISD::SDIVREM, MVT::i64, Expand); ++ setOperationAction(ISD::UDIVREM, MVT::i64, Expand); ++ setOperationAction(ISD::SDIV, MVT::i64, Legal); ++ setOperationAction(ISD::UDIV, MVT::i64, Legal); ++ setOperationAction(ISD::SREM, MVT::i64, Legal); ++ setOperationAction(ISD::UREM, MVT::i64, Legal); ++ ++ MaxGluedStoresPerMemcpy = 4; ++ ++ setMinFunctionAlignment(Align(4)); ++ ++ // The arguments on the stack are defined in terms of 4-byte slots on LP32 ++ // and 8-byte slots on LPX32/LP64. ++ setMinStackArgumentAlignment((ABI.IsLPX32() || ABI.IsLP64()) ? Align(8) ++ : Align(4)); ++ ++ setStackPointerRegisterToSaveRestore(ABI.IsLP64() ? LoongArch::SP_64 : LoongArch::SP); ++ ++ if (Subtarget.hasLASX()) { ++ // = 16*32/2; the smallest memcpy; ++ MaxStoresPerMemcpy = 16; ++ } else if (Subtarget.hasLSX()) { ++ MaxStoresPerMemcpy = 65535; ++ } else { ++ MaxStoresPerMemcpy = 16; + } + +- EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); +- SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); +- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); ++ computeRegisterProperties(Subtarget.getRegisterInfo()); + } + +-SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, +- SelectionDAG &DAG) const { +- SDLoc DL(Op); +- EVT Ty = Op.getValueType(); +- ConstantPoolSDNode *N = cast(Op); ++// Enable LSX support for the given integer type and Register class. ++void LoongArchTargetLowering::addLSXIntType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::ABS, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_SUBVECTOR, Ty, Legal); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } + +- // FIXME: Only support PC-relative addressing to access the symbol. +- // Target flags will be added later. +- if (!isPositionIndependent()) { +- SDValue ConstantN = DAG.getTargetConstantPool( +- N->getConstVal(), Ty, N->getAlign(), N->getOffset()); +- SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN), +- 0); +- SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D +- : LoongArch::ADDI_W, +- DL, Ty, AddrHi, ConstantN), +- 0); +- return Addr; ++ setOperationAction(ISD::ADD, Ty, Legal); ++ setOperationAction(ISD::AND, Ty, Legal); ++ setOperationAction(ISD::CTLZ, Ty, Legal); ++ setOperationAction(ISD::CTPOP, Ty, Legal); ++ setOperationAction(ISD::MUL, Ty, Legal); ++ setOperationAction(ISD::OR, Ty, Legal); ++ setOperationAction(ISD::SDIV, Ty, Legal); ++ setOperationAction(ISD::SREM, Ty, Legal); ++ setOperationAction(ISD::SHL, Ty, Legal); ++ setOperationAction(ISD::SRA, Ty, Legal); ++ setOperationAction(ISD::SRL, Ty, Legal); ++ setOperationAction(ISD::SUB, Ty, Legal); ++ setOperationAction(ISD::SMAX, Ty, Legal); ++ setOperationAction(ISD::SMIN, Ty, Legal); ++ setOperationAction(ISD::UDIV, Ty, Legal); ++ setOperationAction(ISD::UREM, Ty, Legal); ++ setOperationAction(ISD::UMAX, Ty, Legal); ++ setOperationAction(ISD::UMIN, Ty, Legal); ++ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::XOR, Ty, Legal); ++ setOperationAction(ISD::MULHS, Ty, Legal); ++ setOperationAction(ISD::MULHU, Ty, Legal); ++ ++ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { ++ setOperationAction(ISD::SINT_TO_FP, Ty, Custom); ++ setOperationAction(ISD::UINT_TO_FP, Ty, Custom); + } +- report_fatal_error("Unable to lower ConstantPool"); +-} + +-SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, +- SelectionDAG &DAG) const { +- SDLoc DL(Op); +- EVT Ty = getPointerTy(DAG.getDataLayout()); +- const GlobalValue *GV = cast(Op)->getGlobal(); +- unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETNE, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++} + +- // TODO: Support dso_preemptable and target flags. +- if (GV->isDSOLocal()) { +- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); +- SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); +- SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); +- return Addr; ++// Enable LASX support for the given integer type and Register class. ++void LoongArchTargetLowering::addLASXIntType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ // FIXME ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::UADDSAT, Ty, Legal); ++ setOperationAction(ISD::SADDSAT, Ty, Legal); ++ setOperationAction(ISD::USUBSAT, Ty, Legal); ++ setOperationAction(ISD::SSUBSAT, Ty, Legal); ++ setOperationAction(ISD::ABS, Ty, Legal); ++ ++ setOperationAction(ISD::ADD, Ty, Legal); ++ setOperationAction(ISD::AND, Ty, Legal); ++ setOperationAction(ISD::CTLZ, Ty, Legal); ++ setOperationAction(ISD::CTPOP, Ty, Legal); ++ setOperationAction(ISD::MUL, Ty, Legal); ++ setOperationAction(ISD::OR, Ty, Legal); ++ setOperationAction(ISD::SDIV, Ty, Legal); ++ setOperationAction(ISD::SREM, Ty, Legal); ++ setOperationAction(ISD::SHL, Ty, Legal); ++ setOperationAction(ISD::SRA, Ty, Legal); ++ setOperationAction(ISD::SRL, Ty, Legal); ++ setOperationAction(ISD::SUB, Ty, Legal); ++ setOperationAction(ISD::SMAX, Ty, Legal); ++ setOperationAction(ISD::SMIN, Ty, Legal); ++ setOperationAction(ISD::UDIV, Ty, Legal); ++ setOperationAction(ISD::UREM, Ty, Legal); ++ setOperationAction(ISD::UMAX, Ty, Legal); ++ setOperationAction(ISD::UMIN, Ty, Legal); ++ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::XOR, Ty, Legal); ++ setOperationAction(ISD::INSERT_SUBVECTOR, Ty, Legal); ++ setOperationAction(ISD::MULHS, Ty, Legal); ++ setOperationAction(ISD::MULHU, Ty, Legal); ++ ++ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, Ty, Legal); ++ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, Ty, Legal); ++ ++ setOperationAction(ISD::SIGN_EXTEND, Ty, Legal); ++ setOperationAction(ISD::ZERO_EXTEND, Ty, Legal); ++ ++ if (Ty == MVT::v8i32 || Ty == MVT::v4i64) { ++ setOperationAction(ISD::SINT_TO_FP, Ty, Custom); ++ setOperationAction(ISD::UINT_TO_FP, Ty, Custom); + } +- report_fatal_error("Unable to lowerGlobalAddress"); ++ ++ setTargetDAGCombine(ISD::CONCAT_VECTORS); ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETNE, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); + } + +-SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, +- SelectionDAG &DAG) const { +- SDLoc DL(Op); +- SDValue Lo = Op.getOperand(0); +- SDValue Hi = Op.getOperand(1); +- SDValue Shamt = Op.getOperand(2); +- EVT VT = Lo.getValueType(); ++// Enable LSX support for the given floating-point type and Register class. ++void LoongArchTargetLowering::addLSXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ if (Ty == MVT::v4f32 || Ty == MVT::v2f64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } + +- // if Shamt-GRLen < 0: // Shamt < GRLen +- // Lo = Lo << Shamt +- // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) +- // else: +- // Lo = 0 +- // Hi = Lo << (Shamt-GRLen) +- +- SDValue Zero = DAG.getConstant(0, DL, VT); +- SDValue One = DAG.getConstant(1, DL, VT); +- SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); +- SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); +- SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); +- SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); +- +- SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); +- SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); +- SDValue ShiftRightLo = +- DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); +- SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); +- SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); +- SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); ++ setOperationAction(ISD::FADD, Ty, Legal); ++ setOperationAction(ISD::FDIV, Ty, Legal); ++ setOperationAction(ISD::FMA, Ty, Legal); ++ setOperationAction(ISD::FMUL, Ty, Legal); ++ setOperationAction(ISD::FSQRT, Ty, Legal); ++ setOperationAction(ISD::FSUB, Ty, Legal); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::FNEG, Ty, Legal); ++ setOperationAction(ISD::FRINT, Ty, Legal); ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETOGE, Ty, Expand); ++ setCondCodeAction(ISD::SETOGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++} + +- SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); ++// Enable LASX support for the given floating-point type and Register class. ++void LoongArchTargetLowering::addLASXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ setOperationAction(ISD::FADD, Ty, Legal); ++ setOperationAction(ISD::FDIV, Ty, Legal); ++ setOperationAction(ISD::FMA, Ty, Legal); ++ setOperationAction(ISD::FMUL, Ty, Legal); ++ setOperationAction(ISD::FSQRT, Ty, Legal); ++ setOperationAction(ISD::FSUB, Ty, Legal); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::FNEG, Ty, Legal); ++ setOperationAction(ISD::FRINT, Ty, Legal); ++ ++ if (Ty == MVT::v8f32 || Ty == MVT::v4f64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } + +- Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); +- Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETOGE, Ty, Expand); ++ setCondCodeAction(ISD::SETOGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++} + +- SDValue Parts[2] = {Lo, Hi}; +- return DAG.getMergeValues(Parts, DL); ++bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, ++ bool *Fast) const { ++ if (!Subtarget.allowUnalignedAccess()) ++ return false; ++ if (Fast) ++ *Fast = true; ++ return true; + } + +-SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, +- SelectionDAG &DAG, +- bool IsSRA) const { +- SDLoc DL(Op); +- SDValue Lo = Op.getOperand(0); +- SDValue Hi = Op.getOperand(1); +- SDValue Shamt = Op.getOperand(2); +- EVT VT = Lo.getValueType(); +- +- // SRA expansion: +- // if Shamt-GRLen < 0: // Shamt < GRLen +- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) +- // Hi = Hi >>s Shamt +- // else: +- // Lo = Hi >>s (Shamt-GRLen); +- // Hi = Hi >>s (GRLen-1) +- // +- // SRL expansion: +- // if Shamt-GRLen < 0: // Shamt < GRLen +- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) +- // Hi = Hi >>u Shamt +- // else: +- // Lo = Hi >>u (Shamt-GRLen); +- // Hi = 0; +- +- unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; +- +- SDValue Zero = DAG.getConstant(0, DL, VT); +- SDValue One = DAG.getConstant(1, DL, VT); +- SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); +- SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); +- SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); +- SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); ++EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, ++ EVT VT) const { ++ if (!VT.isVector()) ++ return MVT::i32; ++ return VT.changeVectorElementTypeToInteger(); ++} + +- SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); +- SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); +- SDValue ShiftLeftHi = +- DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); +- SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); +- SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); +- SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); +- SDValue HiFalse = +- IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; ++static LoongArch::CondCode condCodeToFCC(ISD::CondCode CC) { ++ switch (CC) { ++ default: llvm_unreachable("Unknown fp condition code!"); ++ case ISD::SETEQ: ++ case ISD::SETOEQ: return LoongArch::FCOND_OEQ; ++ case ISD::SETUNE: return LoongArch::FCOND_UNE; ++ case ISD::SETLT: ++ case ISD::SETOLT: return LoongArch::FCOND_OLT; ++ case ISD::SETGT: ++ case ISD::SETOGT: return LoongArch::FCOND_OGT; ++ case ISD::SETLE: ++ case ISD::SETOLE: return LoongArch::FCOND_OLE; ++ case ISD::SETGE: ++ case ISD::SETOGE: return LoongArch::FCOND_OGE; ++ case ISD::SETULT: return LoongArch::FCOND_ULT; ++ case ISD::SETULE: return LoongArch::FCOND_ULE; ++ case ISD::SETUGT: return LoongArch::FCOND_UGT; ++ case ISD::SETUGE: return LoongArch::FCOND_UGE; ++ case ISD::SETUO: return LoongArch::FCOND_UN; ++ case ISD::SETO: return LoongArch::FCOND_OR; ++ case ISD::SETNE: ++ case ISD::SETONE: return LoongArch::FCOND_ONE; ++ case ISD::SETUEQ: return LoongArch::FCOND_UEQ; ++ } ++} + +- SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); ++/// This function returns true if the floating point conditional branches and ++/// conditional moves which use condition code CC should be inverted. ++static bool invertFPCondCodeUser(LoongArch::CondCode CC) { ++ if (CC >= LoongArch::FCOND_F && CC <= LoongArch::FCOND_SUNE) ++ return false; + +- Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); +- Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); ++ assert((CC >= LoongArch::FCOND_T && CC <= LoongArch::FCOND_GT) && ++ "Illegal Condition Code"); + +- SDValue Parts[2] = {Lo, Hi}; +- return DAG.getMergeValues(Parts, DL); ++ return true; + } + +-// Returns the opcode of the target-specific SDNode that implements the 32-bit +-// form of the given Opcode. +-static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { +- switch (Opcode) { +- default: +- llvm_unreachable("Unexpected opcode"); +- case ISD::SHL: +- return LoongArchISD::SLL_W; +- case ISD::SRA: +- return LoongArchISD::SRA_W; +- case ISD::SRL: +- return LoongArchISD::SRL_W; +- } ++// Creates and returns an FPCmp node from a setcc node. ++// Returns Op if setcc is not a floating point comparison. ++static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { ++ // must be a SETCC node ++ if (Op.getOpcode() != ISD::SETCC) ++ return Op; ++ ++ SDValue LHS = Op.getOperand(0); ++ ++ if (!LHS.getValueType().isFloatingPoint()) ++ return Op; ++ ++ SDValue RHS = Op.getOperand(1); ++ SDLoc DL(Op); ++ ++ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of ++ // node if necessary. ++ ISD::CondCode CC = cast(Op.getOperand(2))->get(); ++ ++ return DAG.getNode(LoongArchISD::FPCmp, DL, MVT::Glue, LHS, RHS, ++ DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); + } + +-// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG +-// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would +-// otherwise be promoted to i64, making it difficult to select the +-// SLL_W/.../*W later one because the fact the operation was originally of +-// type i8/i16/i32 is lost. +-static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, +- unsigned ExtOpc = ISD::ANY_EXTEND) { +- SDLoc DL(N); +- LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); +- SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); +- SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); +- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); +- // ReplaceNodeResults requires we maintain the same type for the return value. +- return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); ++// Creates and returns a CMovFPT/F node. ++static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, ++ SDValue False, const SDLoc &DL) { ++ ConstantSDNode *CC = cast(Cond.getOperand(2)); ++ bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); ++ SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ ++ return DAG.getNode((invert ? LoongArchISD::CMovFP_F : LoongArchISD::CMovFP_T), DL, ++ True.getValueType(), True, FCC0, False, Cond); ++ + } + +-void LoongArchTargetLowering::ReplaceNodeResults( +- SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { +- SDLoc DL(N); +- switch (N->getOpcode()) { +- default: +- llvm_unreachable("Don't know how to legalize this operation"); +- case ISD::SHL: +- case ISD::SRA: +- case ISD::SRL: +- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && +- "Unexpected custom legalisation"); +- if (N->getOperand(1).getOpcode() != ISD::Constant) { +- Results.push_back(customLegalizeToWOp(N, DAG)); +- break; +- } +- break; +- case ISD::FP_TO_SINT: { +- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && +- "Unexpected custom legalisation"); +- SDValue Src = N->getOperand(0); +- EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); +- SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); +- Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); +- break; +- } +- case ISD::BITCAST: { +- EVT VT = N->getValueType(0); +- SDValue Src = N->getOperand(0); +- EVT SrcVT = Src.getValueType(); +- if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && +- Subtarget.hasBasicF()) { +- SDValue Dst = +- DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); +- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); +- } +- break; +- } +- case ISD::FP_TO_UINT: { +- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && +- "Unexpected custom legalisation"); +- auto &TLI = DAG.getTargetLoweringInfo(); +- SDValue Tmp1, Tmp2; +- TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); +- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); +- break; ++static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ if (DCI.isBeforeLegalizeOps()) ++ return SDValue(); ++ ++ SDValue SetCC = N->getOperand(0); ++ ++ if ((SetCC.getOpcode() != ISD::SETCC) || ++ !SetCC.getOperand(0).getValueType().isInteger()) ++ return SDValue(); ++ ++ SDValue False = N->getOperand(2); ++ EVT FalseTy = False.getValueType(); ++ ++ if (!FalseTy.isInteger()) ++ return SDValue(); ++ ++ ConstantSDNode *FalseC = dyn_cast(False); ++ ++ // If the RHS (False) is 0, we swap the order of the operands ++ // of ISD::SELECT (obviously also inverting the condition) so that we can ++ // take advantage of conditional moves using the $0 register. ++ // Example: ++ // return (a != 0) ? x : 0; ++ // load $reg, x ++ // movz $reg, $0, a ++ if (!FalseC) ++ return SDValue(); ++ ++ const SDLoc DL(N); ++ ++ if (!FalseC->getZExtValue()) { ++ ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); ++ SDValue True = N->getOperand(1); ++ ++ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), ++ SetCC.getOperand(1), ++ ISD::getSetCCInverse(CC, SetCC.getValueType())); ++ ++ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); + } ++ ++ // If both operands are integer constants there's a possibility that we ++ // can do some interesting optimizations. ++ SDValue True = N->getOperand(1); ++ ConstantSDNode *TrueC = dyn_cast(True); ++ ++ if (!TrueC || !True.getValueType().isInteger()) ++ return SDValue(); ++ ++ // We'll also ignore MVT::i64 operands as this optimizations proves ++ // to be ineffective because of the required sign extensions as the result ++ // of a SETCC operator is always MVT::i32 for non-vector types. ++ if (True.getValueType() == MVT::i64) ++ return SDValue(); ++ ++ int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); ++ ++ // 1) (a < x) ? y : y-1 ++ // slti $reg1, a, x ++ // addiu $reg2, $reg1, y-1 ++ if (Diff == 1) ++ return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); ++ ++ // 2) (a < x) ? y-1 : y ++ // slti $reg1, a, x ++ // xor $reg1, $reg1, 1 ++ // addiu $reg2, $reg1, y-1 ++ if (Diff == -1) { ++ ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); ++ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), ++ SetCC.getOperand(1), ++ ISD::getSetCCInverse(CC, SetCC.getValueType())); ++ return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); + } ++ ++ // Could not optimize. ++ return SDValue(); + } + + static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { ++ ++ if (Subtarget.hasLSX()) { ++ ++ // Fold zero extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT ++ // ++ // Performs the following transformations: ++ // - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its ++ // sign/zero-extension is completely overwritten by the new one performed ++ // by the ISD::AND. ++ // - Removes redundant zero extensions performed by an ISD::AND. ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ unsigned Op0Opcode = Op0->getOpcode(); ++ ++ // (and (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d) ++ // where $d + 1 == 2^n and n == 32 ++ // or $d + 1 == 2^n and n <= 32 and ZExt ++ // -> (LoongArchVExtractZExt $a, $b, $c) ++ if (Op0Opcode == LoongArchISD::VEXTRACT_SEXT_ELT || ++ Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT) { ++ ConstantSDNode *Mask = dyn_cast(Op1); ++ ++ if (Mask) { ++ ++ int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); ++ ++ if (Log2IfPositive > 0) { ++ SDValue Op0Op2 = Op0->getOperand(2); ++ EVT ExtendTy = cast(Op0Op2)->getVT(); ++ unsigned ExtendTySize = ExtendTy.getSizeInBits(); ++ unsigned Log2 = Log2IfPositive; ++ ++ if ((Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT && ++ Log2 >= ExtendTySize) || ++ Log2 == ExtendTySize) { ++ SDValue Ops[] = {Op0->getOperand(0), Op0->getOperand(1), Op0Op2}; ++ return DAG.getNode(LoongArchISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), ++ Op0->getVTList(), ++ makeArrayRef(Ops, Op0->getNumOperands())); ++ } ++ } ++ } ++ } ++ } ++ + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue FirstOperand = N->getOperand(0); +- SDValue SecondOperand = N->getOperand(1); + unsigned FirstOperandOpc = FirstOperand.getOpcode(); ++ SDValue Mask = N->getOperand(1); + EVT ValTy = N->getValueType(0); + SDLoc DL(N); +- uint64_t lsb, msb; +- unsigned SMIdx, SMLen; ++ ++ uint64_t Lsb = 0, SMLsb, SMSize; + ConstantSDNode *CN; + SDValue NewOperand; +- MVT GRLenVT = Subtarget.getGRLenVT(); ++ unsigned Opc; + + // Op's second operand must be a shifted mask. +- if (!(CN = dyn_cast(SecondOperand)) || +- !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) ++ if (!(CN = dyn_cast(Mask)) || ++ !isShiftedMask(CN->getZExtValue(), SMLsb, SMSize)) + return SDValue(); + + if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { + // Pattern match BSTRPICK. +- // $dst = and ((sra or srl) $src , lsb), (2**len - 1) +- // => BSTRPICK $dst, $src, msb, lsb +- // where msb = lsb + len - 1 ++ // $dst = and ((sra or srl) $src , lsb), (2**size - 1) ++ // => bstrpick $dst, $src, lsb+size-1, lsb + + // The second operand of the shift must be an immediate. + if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) + return SDValue(); + +- lsb = CN->getZExtValue(); ++ Lsb = CN->getZExtValue(); + +- // Return if the shifted mask does not start at bit 0 or the sum of its +- // length and lsb exceeds the word's size. +- if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) ++ // Return if the shifted mask does not start at bit 0 or the sum of its size ++ // and Lsb exceeds the word's size. ++ if (SMLsb != 0 || Lsb + SMSize > ValTy.getSizeInBits()) + return SDValue(); + ++ Opc = LoongArchISD::BSTRPICK; + NewOperand = FirstOperand.getOperand(0); + } else { + // Pattern match BSTRPICK. +- // $dst = and $src, (2**len- 1) , if len > 12 +- // => BSTRPICK $dst, $src, msb, lsb +- // where lsb = 0 and msb = len - 1 ++ // $dst = and $src, (2**size - 1) , if size > 12 ++ // => bstrpick $dst, $src, lsb+size-1, lsb , lsb = 0 + + // If the mask is <= 0xfff, andi can be used instead. + if (CN->getZExtValue() <= 0xfff) + return SDValue(); +- + // Return if the mask doesn't start at position 0. +- if (SMIdx) ++ if (SMLsb) + return SDValue(); + +- lsb = 0; ++ Opc = LoongArchISD::BSTRPICK; + NewOperand = FirstOperand; + } +- msb = lsb + SMLen - 1; +- return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, +- DAG.getConstant(msb, DL, GRLenVT), +- DAG.getConstant(lsb, DL, GRLenVT)); ++ return DAG.getNode(Opc, DL, ValTy, NewOperand, ++ DAG.getConstant((Lsb + SMSize - 1), DL, MVT::i32), ++ DAG.getConstant(Lsb, DL, MVT::i32)); + } + +-static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, +- TargetLowering::DAGCombinerInfo &DCI, +- const LoongArchSubtarget &Subtarget) { +- if (DCI.isBeforeLegalizeOps()) +- return SDValue(); ++// Determine if the specified node is a constant vector splat. ++// ++// Returns true and sets Imm if: ++// * N is a ISD::BUILD_VECTOR representing a constant splat ++static bool isVSplat(SDValue N, APInt &Imm) { ++ BuildVectorSDNode *Node = dyn_cast(N.getNode()); + +- // $dst = srl (and $src, Mask), Shamt +- // => +- // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt +- // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 +- // ++ if (!Node) ++ return false; + +- SDValue FirstOperand = N->getOperand(0); +- ConstantSDNode *CN; +- EVT ValTy = N->getValueType(0); +- SDLoc DL(N); +- MVT GRLenVT = Subtarget.getGRLenVT(); +- unsigned MaskIdx, MaskLen; +- uint64_t Shamt; ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ 8)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++// Test whether the given node is an all-ones build_vector. ++static bool isVectorAllOnes(SDValue N) { ++ // Look through bitcasts. Endianness doesn't matter because we are looking ++ // for an all-ones value. ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ BuildVectorSDNode *BVN = dyn_cast(N); ++ ++ if (!BVN) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ // Endianness doesn't matter in this context because we are looking for ++ // an all-ones value. ++ if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) ++ return SplatValue.isAllOnesValue(); ++ ++ return false; ++} ++ ++// Test whether N is the bitwise inverse of OfNode. ++static bool isBitwiseInverse(SDValue N, SDValue OfNode) { ++ if (N->getOpcode() != ISD::XOR) ++ return false; ++ ++ if (isVectorAllOnes(N->getOperand(0))) ++ return N->getOperand(1) == OfNode; ++ ++ if (isVectorAllOnes(N->getOperand(1))) ++ return N->getOperand(0) == OfNode; ++ ++ return false; ++} + +- // The first operand must be an AND and the second operand of the AND must be +- // a shifted mask. +- if (FirstOperand.getOpcode() != ISD::AND || +- !(CN = dyn_cast(FirstOperand.getOperand(1))) || +- !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) ++static SDValue performSet(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ SDValue N1, N2; ++ if (Op0->getOpcode() == ISD::BUILD_VECTOR && ++ (Op1->getValueType(0).is128BitVector() || ++ Op1->getValueType(0).is256BitVector())) { ++ N1 = Op0; ++ N2 = Op1; ++ } else if (Op1->getOpcode() == ISD::BUILD_VECTOR && ++ (Op0->getValueType(0).is128BitVector() || ++ Op0->getValueType(0).is256BitVector())) { ++ N1 = Op1; ++ N2 = Op0; ++ } else + return SDValue(); + +- // The second operand (shift amount) must be an immediate. +- if (!(CN = dyn_cast(N->getOperand(1)))) ++ APInt Mask1, Mask2; ++ if (!isVSplat(N1, Mask1)) + return SDValue(); + +- Shamt = CN->getZExtValue(); +- if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) +- return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, +- FirstOperand->getOperand(0), +- DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), +- DAG.getConstant(Shamt, DL, GRLenVT)); ++ if (!N1->getValueType(0).isSimple()) ++ return SDValue(); + +- return SDValue(); ++ ConstantSDNode *C1; ++ uint64_t Imm; ++ unsigned ImmL; ++ if (!(C1 = dyn_cast(N1.getOperand(0))) || ++ !isPowerOf2_64(C1->getZExtValue())) ++ return SDValue(); ++ ++ Imm = C1->getZExtValue(); ++ ImmL = Log2_64(Imm); ++ MVT VT = N1->getSimpleValueType(0).SimpleTy; ++ ++ SDNode *Res; ++ ++ if (Subtarget.hasLASX() && N->getValueType(0).is256BitVector()) { ++ if (VT == MVT::v32i8 && ImmL < 8) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_B, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v16i16 && ImmL < 16) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_H, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v8i32 && ImmL < 32) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_W, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v4i64 && ImmL < 64) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_D, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else ++ return SDValue(); ++ } else if (N->getValueType(0).is128BitVector()) { ++ if (VT == MVT::v16i8 && ImmL < 8) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_B, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v8i16 && ImmL < 16) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_H, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v4i32 && ImmL < 32) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_W, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v2i64 && ImmL < 64) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_D, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else ++ return SDValue(); ++ ++ } else ++ return SDValue(); ++ ++ return SDValue(Res, 0); + } + + static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { +- MVT GRLenVT = Subtarget.getGRLenVT(); +- EVT ValTy = N->getValueType(0); +- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); +- ConstantSDNode *CN0, *CN1; +- SDLoc DL(N); +- unsigned ValBits = ValTy.getSizeInBits(); +- unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; +- unsigned Shamt; +- bool SwapAndRetried = false; + ++ SDValue Res; ++ if (Subtarget.hasLSX() && (N->getValueType(0).is128BitVector() || ++ N->getValueType(0).is256BitVector())) { ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ ++ if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { ++ SDValue Op0Op0 = Op0->getOperand(0); ++ SDValue Op0Op1 = Op0->getOperand(1); ++ SDValue Op1Op0 = Op1->getOperand(0); ++ SDValue Op1Op1 = Op1->getOperand(1); ++ ++ SDValue IfSet, IfClr, Cond; ++ bool IsConstantMask = false; ++ APInt Mask, InvMask; ++ ++ // If Op0Op0 is an appropriate mask, try to find it's inverse in either ++ // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, ++ // while looking. IfClr will be set if we find a valid match. ++ if (isVSplat(Op0Op0, Mask)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ ++ if (isVSplat(Op1Op0, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) ++ IfClr = Op1Op1; ++ else if (isVSplat(Op1Op1, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && ++ Mask == ~InvMask) ++ IfClr = Op1Op0; ++ ++ IsConstantMask = true; ++ } ++ ++ // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the ++ // same thing again using this mask. IfClr will be set if we find a valid ++ // match. ++ if (!IfClr.getNode() && isVSplat(Op0Op1, Mask)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ ++ if (isVSplat(Op1Op0, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) ++ IfClr = Op1Op1; ++ else if (isVSplat(Op1Op1, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && ++ Mask == ~InvMask) ++ IfClr = Op1Op0; ++ ++ IsConstantMask = true; ++ } ++ ++ // If IfClr is not yet set, try looking for a non-constant match. ++ // IfClr will be set if we find a valid match amongst the eight ++ // possibilities. ++ if (!IfClr.getNode()) { ++ if (isBitwiseInverse(Op0Op0, Op1Op0)) { ++ Cond = Op1Op0; ++ IfSet = Op1Op1; ++ IfClr = Op0Op1; ++ } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { ++ Cond = Op1Op0; ++ IfSet = Op1Op1; ++ IfClr = Op0Op0; ++ } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { ++ Cond = Op1Op1; ++ IfSet = Op1Op0; ++ IfClr = Op0Op1; ++ } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { ++ Cond = Op1Op1; ++ IfSet = Op1Op0; ++ IfClr = Op0Op0; ++ } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ IfClr = Op1Op1; ++ } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ IfClr = Op1Op0; ++ } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ IfClr = Op1Op1; ++ } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ IfClr = Op1Op0; ++ } ++ } ++ ++ // At this point, IfClr will be set if we have a valid match. ++ if (IfClr.getNode()) { ++ assert(Cond.getNode() && IfSet.getNode()); ++ ++ // Fold degenerate cases. ++ if (IsConstantMask) { ++ if (Mask.isAllOnesValue()) ++ return IfSet; ++ else if (Mask == 0) ++ return IfClr; ++ } ++ ++ // Transform the DAG into an equivalent VSELECT. ++ return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Cond, ++ IfSet, IfClr); ++ } ++ } ++ ++ if (Res = performSet(N, DAG, DCI, Subtarget)) ++ return Res; ++ } ++ ++ // Pattern match BSTRINS. ++ // $dst = or (and $src1 , mask0), (and (shl $src, lsb), mask1), ++ // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 ++ // => bstrins $dst, $src, lsb+size-1, lsb, $src1 + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + +- if (ValBits != 32 && ValBits != 64) ++ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); ++ uint64_t SMLsb0, SMSize0, SMLsb1, SMSize1; ++ ConstantSDNode *CN, *CN1; ++ ++ // See if Op's first operand matches (and $src1 , mask0). ++ if (And0.getOpcode() != ISD::AND) + return SDValue(); + +-Retry: +- // 1st pattern to match BSTRINS: +- // R = or (and X, mask0), (and (shl Y, lsb), mask1) +- // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 +- // => +- // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) +- if (N0.getOpcode() == ISD::AND && +- (CN0 = dyn_cast(N0.getOperand(1))) && +- isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && +- N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && +- (CN1 = dyn_cast(N1.getOperand(1))) && +- isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && +- MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && +- (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && +- (Shamt = CN1->getZExtValue()) == MaskIdx0 && +- (MaskIdx0 + MaskLen0 <= ValBits)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), +- N1.getOperand(0).getOperand(0), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), +- DAG.getConstant(MaskIdx0, DL, GRLenVT)); +- } +- +- // 2nd pattern to match BSTRINS: +- // R = or (and X, mask0), (shl (and Y, mask1), lsb) +- // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) +- // => +- // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) +- if (N0.getOpcode() == ISD::AND && +- (CN0 = dyn_cast(N0.getOperand(1))) && +- isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && +- N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && +- (CN1 = dyn_cast(N1.getOperand(1))) && +- (Shamt = CN1->getZExtValue()) == MaskIdx0 && +- (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && +- isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && +- MaskLen0 == MaskLen1 && MaskIdx1 == 0 && +- (MaskIdx0 + MaskLen0 <= ValBits)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), +- N1.getOperand(0).getOperand(0), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), +- DAG.getConstant(MaskIdx0, DL, GRLenVT)); +- } +- +- // 3rd pattern to match BSTRINS: +- // R = or (and X, mask0), (and Y, mask1) +- // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 +- // => +- // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb +- // where msb = lsb + size - 1 +- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && +- (CN0 = dyn_cast(N0.getOperand(1))) && +- isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && +- (MaskIdx0 + MaskLen0 <= 64) && +- (CN1 = dyn_cast(N1->getOperand(1))) && +- (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), +- DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, +- DAG.getConstant(MaskIdx0, DL, GRLenVT)), +- DAG.getConstant(ValBits == 32 +- ? (MaskIdx0 + (MaskLen0 & 31) - 1) +- : (MaskIdx0 + MaskLen0 - 1), +- DL, GRLenVT), +- DAG.getConstant(MaskIdx0, DL, GRLenVT)); +- } +- +- // 4th pattern to match BSTRINS: +- // R = or (and X, mask), (shl Y, shamt) +- // where mask = (2**shamt - 1) +- // => +- // R = BSTRINS X, Y, ValBits - 1, shamt +- // where ValBits = 32 or 64 +- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && +- (CN0 = dyn_cast(N0.getOperand(1))) && +- isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && +- MaskIdx0 == 0 && (CN1 = dyn_cast(N1.getOperand(1))) && +- (Shamt = CN1->getZExtValue()) == MaskLen0 && +- (MaskIdx0 + MaskLen0 <= ValBits)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), +- N1.getOperand(0), +- DAG.getConstant((ValBits - 1), DL, GRLenVT), +- DAG.getConstant(Shamt, DL, GRLenVT)); +- } +- +- // 5th pattern to match BSTRINS: +- // R = or (and X, mask), const +- // where ~mask = (2**size - 1) << lsb, mask & const = 0 +- // => +- // R = BSTRINS X, (const >> lsb), msb, lsb +- // where msb = lsb + size - 1 +- if (N0.getOpcode() == ISD::AND && +- (CN0 = dyn_cast(N0.getOperand(1))) && +- isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && +- (CN1 = dyn_cast(N1)) && +- (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); +- return DAG.getNode( +- LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), +- DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), +- DAG.getConstant(MaskIdx0, DL, GRLenVT)); +- } +- +- // 6th pattern. +- // a = b | ((c & mask) << shamt), where all positions in b to be overwritten +- // by the incoming bits are known to be zero. +- // => +- // a = BSTRINS b, c, shamt + MaskLen - 1, shamt +- // +- // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th +- // pattern is more common than the 1st. So we put the 1st before the 6th in +- // order to match as many nodes as possible. +- ConstantSDNode *CNMask, *CNShamt; +- unsigned MaskIdx, MaskLen; +- if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && +- (CNMask = dyn_cast(N1.getOperand(0).getOperand(1))) && +- isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && +- MaskIdx == 0 && (CNShamt = dyn_cast(N1.getOperand(1))) && +- CNShamt->getZExtValue() + MaskLen <= ValBits) { +- Shamt = CNShamt->getZExtValue(); +- APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); +- if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, +- N1.getOperand(0).getOperand(0), +- DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), +- DAG.getConstant(Shamt, DL, GRLenVT)); ++ if (!(CN = dyn_cast(And0.getOperand(1))) || ++ !isShiftedMask(~CN->getSExtValue(), SMLsb0, SMSize0)) ++ return SDValue(); ++ ++ // See if Op's second operand matches (and (shl $src, lsb), mask1). ++ if (And1.getOpcode() == ISD::AND && ++ And1.getOperand(0).getOpcode() == ISD::SHL) { ++ ++ if (!(CN = dyn_cast(And1.getOperand(1))) || ++ !isShiftedMask(CN->getZExtValue(), SMLsb1, SMSize1)) ++ return SDValue(); ++ ++ // The shift masks must have the same least significant bit and size. ++ if (SMLsb0 != SMLsb1 || SMSize0 != SMSize1) ++ return SDValue(); ++ ++ SDValue Shl = And1.getOperand(0); ++ ++ if (!(CN = dyn_cast(Shl.getOperand(1)))) ++ return SDValue(); ++ ++ unsigned Shamt = CN->getZExtValue(); ++ ++ // Return if the shift amount and the first bit position of mask are not the ++ // same. ++ EVT ValTy = N->getValueType(0); ++ if ((Shamt != SMLsb0) || (SMLsb0 + SMSize0 > ValTy.getSizeInBits())) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, Shl.getOperand(0), ++ DAG.getConstant((SMLsb0 + SMSize0 - 1), DL, MVT::i32), ++ DAG.getConstant(SMLsb0, DL, MVT::i32), ++ And0.getOperand(0)); ++ } else { ++ // Pattern match BSTRINS. ++ // $dst = or (and $src, mask0), mask1 ++ // where mask0 = ((1 << SMSize0) -1) << SMLsb0 ++ // => bstrins $dst, $src, SMLsb0+SMSize0-1, SMLsb0 ++ if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMLsb0) && ++ (SMSize0 + SMLsb0 <= 64)) { ++ // Check if AND instruction has constant as argument ++ bool isConstCase = And1.getOpcode() != ISD::AND; ++ if (And1.getOpcode() == ISD::AND) { ++ if (!(CN1 = dyn_cast(And1->getOperand(1)))) ++ return SDValue(); ++ } else { ++ if (!(CN1 = dyn_cast(N->getOperand(1)))) ++ return SDValue(); ++ } ++ // Don't generate BSTRINS if constant OR operand doesn't fit into bits ++ // cleared by constant AND operand. ++ if (CN->getSExtValue() & CN1->getSExtValue()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ EVT ValTy = N->getOperand(0)->getValueType(0); ++ SDValue Const1; ++ SDValue SrlX; ++ if (!isConstCase) { ++ Const1 = DAG.getConstant(SMLsb0, DL, MVT::i32); ++ SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); ++ } ++ return DAG.getNode( ++ LoongArchISD::BSTRINS, DL, N->getValueType(0), ++ isConstCase ++ ? DAG.getConstant(CN1->getSExtValue() >> SMLsb0, DL, ValTy) ++ : SrlX, ++ DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? (SMLsb0 + (SMSize0 & 31) - 1) ++ : (SMLsb0 + SMSize0 - 1), ++ DL, MVT::i32), ++ DAG.getConstant(SMLsb0, DL, MVT::i32), ++ And0->getOperand(0)); ++ + } ++ return SDValue(); + } ++} + +- // 7th pattern. +- // a = b | ((c << shamt) & shifted_mask), where all positions in b to be +- // overwritten by the incoming bits are known to be zero. +- // => +- // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx ++static bool ++shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ // Estimate the number of operations the below transform will turn a ++ // constant multiply into. The number is approximately equal to the minimal ++ // number of powers of two that constant can be broken down to by adding ++ // or subtracting them. + // +- // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd +- // before the 7th in order to match as many nodes as possible. +- if (N1.getOpcode() == ISD::AND && +- (CNMask = dyn_cast(N1.getOperand(1))) && +- isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && +- N1.getOperand(0).getOpcode() == ISD::SHL && +- (CNShamt = dyn_cast(N1.getOperand(0).getOperand(1))) && +- CNShamt->getZExtValue() == MaskIdx) { +- APInt ShMask(ValBits, CNMask->getZExtValue()); +- if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, +- N1.getOperand(0).getOperand(0), +- DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), +- DAG.getConstant(MaskIdx, DL, GRLenVT)); ++ // If we have taken more than 10[1] / 8[2] steps to attempt the ++ // optimization for a native sized value, it is more than likely that this ++ // optimization will make things worse. ++ // ++ // [1] LA64 requires 4 instructions at most to materialize any constant, ++ // multiplication requires at least 4 cycles, but another cycle (or two) ++ // to retrieve the result from corresponding registers. ++ // ++ // [2] LA32 requires 2 instructions at most to materialize any constant, ++ // multiplication requires at least 4 cycles, but another cycle (or two) ++ // to retrieve the result from corresponding registers. ++ // ++ // TODO: ++ // - MaxSteps needs to consider the `VT` of the constant for the current ++ // target. ++ // - Consider to perform this optimization after type legalization. ++ // That allows to remove a workaround for types not supported natively. ++ // - Take in account `-Os, -Oz` flags because this optimization ++ // increases code size. ++ unsigned MaxSteps = Subtarget.isABI_LP32() ? 8 : 10; ++ ++ SmallVector WorkStack(1, C); ++ unsigned Steps = 0; ++ unsigned BitWidth = C.getBitWidth(); ++ ++ while (!WorkStack.empty()) { ++ APInt Val = WorkStack.pop_back_val(); ++ ++ if (Val == 0 || Val == 1) ++ continue; ++ ++ if (Steps >= MaxSteps) ++ return false; ++ ++ if (Val.isPowerOf2()) { ++ ++Steps; ++ continue; + } +- } + +- // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. +- if (!SwapAndRetried) { +- std::swap(N0, N1); +- SwapAndRetried = true; +- goto Retry; +- } ++ APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); ++ APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) ++ : APInt(BitWidth, 1) << C.ceilLogBase2(); + +- SwapAndRetried = false; +-Retry2: +- // 8th pattern. +- // a = b | (c & shifted_mask), where all positions in b to be overwritten by +- // the incoming bits are known to be zero. +- // => +- // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx +- // +- // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So +- // we put it here in order to match as many nodes as possible or generate less +- // instructions. +- if (N1.getOpcode() == ISD::AND && +- (CNMask = dyn_cast(N1.getOperand(1))) && +- isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { +- APInt ShMask(ValBits, CNMask->getZExtValue()); +- if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { +- LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); +- return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, +- DAG.getNode(ISD::SRL, DL, N1->getValueType(0), +- N1->getOperand(0), +- DAG.getConstant(MaskIdx, DL, GRLenVT)), +- DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), +- DAG.getConstant(MaskIdx, DL, GRLenVT)); ++ if ((Val - Floor).ule(Ceil - Val)) { ++ WorkStack.push_back(Floor); ++ WorkStack.push_back(Val - Floor); ++ } else { ++ WorkStack.push_back(Ceil); ++ WorkStack.push_back(Ceil - Val); + } ++ ++ ++Steps; + } +- // Swap N0/N1 and retry. +- if (!SwapAndRetried) { +- std::swap(N0, N1); +- SwapAndRetried = true; +- goto Retry2; ++ ++ // If the value being multiplied is not supported natively, we have to pay ++ // an additional legalization cost, conservatively assume an increase in the ++ // cost of 3 instructions per step. This values for this heuristic were ++ // determined experimentally. ++ unsigned RegisterSize = DAG.getTargetLoweringInfo() ++ .getRegisterType(*DAG.getContext(), VT) ++ .getSizeInBits(); ++ Steps *= (VT.getSizeInBits() != RegisterSize) * 3; ++ if (Steps > 27) ++ return false; ++ ++ return true; ++} ++ ++static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, ++ EVT ShiftTy, SelectionDAG &DAG) { ++ // Return 0. ++ if (C == 0) ++ return DAG.getConstant(0, DL, VT); ++ ++ // Return x. ++ if (C == 1) ++ return X; ++ ++ // If c is power of 2, return (shl x, log2(c)). ++ if (C.isPowerOf2()) ++ return DAG.getNode(ISD::SHL, DL, VT, X, ++ DAG.getConstant(C.logBase2(), DL, ShiftTy)); ++ ++ unsigned BitWidth = C.getBitWidth(); ++ APInt Floor = APInt(BitWidth, 1) << C.logBase2(); ++ APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : ++ APInt(BitWidth, 1) << C.ceilLogBase2(); ++ ++ // If |c - floor_c| <= |c - ceil_c|, ++ // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), ++ // return (add constMult(x, floor_c), constMult(x, c - floor_c)). ++ if ((C - Floor).ule(Ceil - C)) { ++ SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); ++ SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); ++ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); ++ } ++ ++ // If |c - floor_c| > |c - ceil_c|, ++ // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). ++ SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); ++ SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); ++ return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); ++} ++ ++static SDValue performLogicCombine(SDNode *N, SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDLoc DL(N); ++ SDValue N0 = N->getOperand(0); ++ SDValue N1 = N->getOperand(1); ++ ++ if (!(N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE)) ++ return SDValue(); ++ ++ if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && ++ N1->getValueType(0).isSimple() && ++ N0->getOperand(0)->getValueType(0).isSimple() && ++ N1->getOperand(0)->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && ++ N1->getValueType(0).isSimple() && ++ N0->getOperand(0)->getValueType(0).isSimple() && ++ N1->getOperand(0)->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::i32 && ++ N0->getSimpleValueType(0).SimpleTy == MVT::i32 && ++ N1->getSimpleValueType(0).SimpleTy == MVT::i32)) ++ return SDValue(); ++ ++ if (!(N0->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64 && ++ N1->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64)) ++ return SDValue(); ++ ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ SDValue Val0 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ++ N0->getValueType(0), ++ N0->getOperand(0), SubReg), ++ 0); ++ SDValue Val1 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ++ N1->getValueType(0), ++ N1->getOperand(0), SubReg), ++ 0); ++ ++ return DAG.getNode(N->getOpcode(), DL, N0->getValueType(0), Val0, Val1); ++} ++ ++static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, ++ const TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchTargetLowering *TL, ++ const LoongArchSubtarget &Subtarget) { ++ EVT VT = N->getValueType(0); ++ ++ SDValue Res; ++ if ((Res = performLogicCombine(N, DAG, Subtarget))) ++ return Res; ++ ++ if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) ++ if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( ++ C->getAPIntValue(), VT, DAG, Subtarget)) ++ return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, ++ TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), ++ DAG); ++ ++ return SDValue(N, 0); ++} ++ ++// Fold sign-extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT for LSX. ++// ++// Performs the following transformations: ++// - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its ++// sign/zero-extension is completely overwritten by the new one performed by ++// the ISD::SRA and ISD::SHL nodes. ++// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL ++// sequence. ++static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDValue Res; ++ if ((Res = performLogicCombine(N, DAG, Subtarget))) ++ return Res; ++ ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ ++ // (sra (shl (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) ++ // where $d + sizeof($c) == 32 ++ // or $d + sizeof($c) <= 32 and SExt ++ // -> (LoongArchVExtractSExt $a, $b, $c) ++ if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { ++ SDValue Op0Op0 = Op0->getOperand(0); ++ ConstantSDNode *ShAmount = dyn_cast(Op1); ++ ++ if (!ShAmount) ++ return SDValue(); ++ ++ if (Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_SEXT_ELT && ++ Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_ZEXT_ELT) ++ return SDValue(); ++ ++ EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); ++ unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); ++ ++ if (TotalBits == 32 || ++ (Op0Op0->getOpcode() == LoongArchISD::VEXTRACT_SEXT_ELT && ++ TotalBits <= 32)) { ++ SDValue Ops[] = {Op0Op0->getOperand(0), Op0Op0->getOperand(1), ++ Op0Op0->getOperand(2)}; ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), ++ Op0Op0->getVTList(), ++ makeArrayRef(Ops, Op0Op0->getNumOperands())); ++ } ++ } ++ } ++ ++ return SDValue(); ++} ++ ++// combine vsub/vslt/vbitsel.v to vabsd ++static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { ++ assert((N->getOpcode() == ISD::VSELECT) && "Need ISD::VSELECT"); ++ ++ SDLoc dl(N); ++ SDValue Cond = N->getOperand(0); ++ SDValue TrueOpnd = N->getOperand(1); ++ SDValue FalseOpnd = N->getOperand(2); ++ ++ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || ++ FalseOpnd.getOpcode() != ISD::SUB) ++ return SDValue(); ++ ++ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) ++ return SDValue(); ++ ++ ISD::CondCode CC = cast(Cond.getOperand(2))->get(); ++ ++ switch (CC) { ++ default: ++ return SDValue(); ++ case ISD::SETUGT: ++ case ISD::SETUGE: ++ case ISD::SETGT: ++ case ISD::SETGE: ++ break; ++ case ISD::SETULT: ++ case ISD::SETULE: ++ case ISD::SETLT: ++ case ISD::SETLE: ++ std::swap(TrueOpnd, FalseOpnd); ++ break; ++ } ++ ++ SDValue Op1 = Cond.getOperand(0); ++ SDValue Op2 = Cond.getOperand(1); ++ ++ if (TrueOpnd.getOperand(0) == Op1 && TrueOpnd.getOperand(1) == Op2 && ++ FalseOpnd.getOperand(0) == Op2 && FalseOpnd.getOperand(1) == Op1) { ++ if (ISD::isSignedIntSetCC(CC)) { ++ return DAG.getNode(LoongArchISD::VABSD, dl, ++ N->getOperand(1).getValueType(), Op1, Op2, ++ DAG.getTargetConstant(0, dl, MVT::i32)); ++ } else { ++ return DAG.getNode(LoongArchISD::UVABSD, dl, ++ N->getOperand(1).getValueType(), Op1, Op2, ++ DAG.getTargetConstant(0, dl, MVT::i32)); ++ } ++ } ++ return SDValue(); ++} ++ ++static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ ++ EVT Ty = N->getValueType(0); ++ ++ if ((Subtarget.hasLSX() && Ty.is128BitVector() && Ty.isInteger()) || ++ (Subtarget.hasLASX() && Ty.is256BitVector() && Ty.isInteger())) { ++ // Try the following combines: ++ // (xor (or $a, $b), (build_vector allones)) ++ // (xor (or $a, $b), (bitcast (build_vector allones))) ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ SDValue NotOp; ++ ++ if (ISD::isBuildVectorAllOnes(Op0.getNode())) ++ NotOp = Op1; ++ else if (ISD::isBuildVectorAllOnes(Op1.getNode())) ++ NotOp = Op0; ++ else ++ return SDValue(); ++ ++ if (NotOp->getOpcode() == ISD::OR) ++ return DAG.getNode(LoongArchISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), ++ NotOp->getOperand(1)); ++ } ++ ++ return SDValue(); ++} ++ ++// When using a 256-bit vector is less expensive than using a 128-bit vector, ++// use this function to convert a 128-bit vector to a 256-bit vector. ++static SDValue ++performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::CONCAT_VECTORS) && "Need ISD::CONCAT_VECTORS"); ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ SDValue Top0 = N->getOperand(0); ++ SDValue Top1 = N->getOperand(1); ++ ++ // Check for cheaper optimizations. ++ if (!((Top0->getOpcode() == ISD::SIGN_EXTEND) && ++ (Top1->getOpcode() == ISD::SIGN_EXTEND))) ++ return SDValue(); ++ if (!((Top0->getOperand(0)->getOpcode() == ISD::ADD) && ++ (Top1->getOperand(0)->getOpcode() == ISD::ADD))) ++ return SDValue(); ++ ++ SDValue Op_a0 = Top0->getOperand(0); ++ SDValue Op_a1 = Top1->getOperand(0); ++ for (int i = 0; i < 2; i++) { ++ if (!((Op_a0->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR) && ++ (Op_a1->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR))) ++ return SDValue(); ++ } ++ ++ SDValue Ops_b[] = {Op_a0->getOperand(0), Op_a0->getOperand(1), ++ Op_a1->getOperand(0), Op_a1->getOperand(1)}; ++ for (int i = 0; i < 4; i++) { ++ if (Ops_b[i]->getNumOperands() != 2) ++ return SDValue(); ++ } ++ ++ // Currently only a single case is handled, and more optimization scenarios ++ // will be added in the future. ++ SDValue Ops_e[] = {Ops_b[0]->getOperand(0), Ops_b[0]->getOperand(1), ++ Ops_b[2]->getOperand(0), Ops_b[2]->getOperand(1), ++ Ops_b[1]->getOperand(0), Ops_b[1]->getOperand(1), ++ Ops_b[3]->getOperand(0), Ops_b[3]->getOperand(1)}; ++ for (int i = 0; i < 8; i++) { ++ if (dyn_cast(Ops_e[i])) ++ return SDValue(); ++ if (i < 4) { ++ if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != ++ (2 * i)) ++ return SDValue(); ++ } else { ++ if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != ++ (2 * i - 7)) ++ return SDValue(); ++ } ++ } ++ ++ for (int i = 0; i < 5; i = i + 4) { ++ if (!((Ops_e[i]->getOperand(0) == Ops_e[i + 1]->getOperand(0)) && ++ (Ops_e[i + 1]->getOperand(0) == Ops_e[i + 2]->getOperand(0)) && ++ (Ops_e[i + 2]->getOperand(0) == Ops_e[i + 3]->getOperand(0)))) ++ return SDValue(); ++ } ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, ++ Ops_e[6]->getOperand(0), ++ Ops_e[0]->getOperand(0)), ++ 0); ++} ++ ++static SDValue performParity(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDLoc DL(N); ++ SDValue T = N->getOperand(0); ++ if (!(N->getValueType(0).isSimple() && T->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ SDValue Ops[4]; ++ bool pos_e = false; ++ bool pos_o = false; ++ ++ for (int i = 0; i < 4; i++) { ++ Ops[i] = T->getOperand(i); ++ if (!Ops[i]->getValueType(0).isSimple()) ++ return SDValue(); ++ if (Ops[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ++ return SDValue(); ++ ++ if (!dyn_cast(Ops[i]->getOperand(1))) ++ return SDValue(); ++ ++ if (cast(Ops[i]->getOperand(1))->getSExtValue() == ++ (2 * i)) { ++ pos_e = true; ++ } else if (cast(Ops[i]->getOperand(1))->getSExtValue() == ++ (2 * i + 1)) { ++ pos_o = true; ++ } else ++ return SDValue(); ++ } ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && ++ T->getSimpleValueType(0).SimpleTy == MVT::v4i32)) ++ return SDValue(); ++ ++ for (int j = 0; j < 3; j++) { ++ if (Ops[j]->getOperand(0) != Ops[j + 1]->getOperand(0)) ++ return SDValue(); ++ } ++ if (pos_e) { ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } ++ } else if (pos_o) { ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } ++ } else ++ return SDValue(); ++ ++ return SDValue(); ++} ++ ++// Optimize zero extension and sign extension of data ++static SDValue performExtend(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ if (!Subtarget.hasLASX()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ SDValue T = N->getOperand(0); ++ ++ if (T->getOpcode() == ISD::BUILD_VECTOR) ++ return performParity(N, DAG, DCI, Subtarget); ++ ++ if (T->getOpcode() != ISD::ADD && T->getOpcode() != ISD::SUB) ++ return SDValue(); ++ ++ SDValue T0 = T->getOperand(0); ++ SDValue T1 = T->getOperand(1); ++ ++ if (!(T0->getOpcode() == ISD::BUILD_VECTOR && ++ T1->getOpcode() == ISD::BUILD_VECTOR)) ++ return SDValue(); ++ ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ if (!(T->getValueType(0).isSimple() && T0->getValueType(0).isSimple() && ++ T1->getValueType(0).isSimple() && N->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && ++ T->getSimpleValueType(0).SimpleTy == MVT::v4i32 && ++ T0->getSimpleValueType(0).SimpleTy == MVT::v4i32 && ++ T1->getSimpleValueType(0).SimpleTy == MVT::v4i32)) ++ return SDValue(); ++ ++ SDValue Opse0[4]; ++ SDValue Opse1[4]; ++ ++ for (int i = 0; i < 4; i++) { ++ if (T->getOpcode() == ISD::ADD) { ++ Opse0[i] = T1->getOperand(i); ++ Opse1[i] = T0->getOperand(i); ++ } else if (T->getOpcode() == ISD::SUB) { ++ Opse0[i] = T0->getOperand(i); ++ Opse1[i] = T1->getOperand(i); ++ } ++ ++ if (Opse0[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT || ++ Opse1[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ++ return SDValue(); ++ ++ if (!(dyn_cast(Opse0[i]->getOperand(1)) && ++ dyn_cast(Opse1[i]->getOperand(1)))) ++ return SDValue(); ++ ++ if (cast(Opse0[i]->getOperand(1))->getSExtValue() != ++ (2 * i + 1) || ++ cast(Opse1[i]->getOperand(1))->getSExtValue() != ++ (2 * i)) ++ return SDValue(); ++ ++ if (i > 0 && (Opse0[i]->getOperand(0) != Opse0[i - 1]->getOperand(0) || ++ Opse1[i]->getOperand(0) != Opse1[i - 1]->getOperand(0))) ++ return SDValue(); ++ } ++ ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (T->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, ++ Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ else if (T->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_D_W, DL, MVT::v4i64, ++ Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (T->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_DU_WU, DL, ++ MVT::v4i64, Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ else if (T->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_DU_WU, DL, ++ MVT::v4i64, Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue performSIGN_EXTENDCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::SIGN_EXTEND) && "Need ISD::SIGN_EXTEND"); ++ ++ SDLoc DL(N); ++ SDValue Top = N->getOperand(0); ++ ++ SDValue Res; ++ if (Res = performExtend(N, DAG, DCI, Subtarget)) ++ return Res; ++ ++ if (!(Top->getOpcode() == ISD::CopyFromReg)) ++ return SDValue(); ++ ++ if ((Top->getOperand(0)->getOpcode() == ISD::EntryToken) && ++ (N->getValueType(0) == MVT::i64)) { ++ ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ SDNode *Res = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); ++ ++ Res = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i64, ++ SDValue(Res, 0), Top, SubReg); ++ ++ return SDValue(Res, 0); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue performZERO_EXTENDCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::ZERO_EXTEND) && "Need ISD::ZERO_EXTEND"); ++ ++ SDLoc DL(N); ++ ++ SDValue Res; ++ if (Res = performExtend(N, DAG, DCI, Subtarget)) ++ return Res; ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering:: ++PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { ++ SelectionDAG &DAG = DCI.DAG; ++ SDValue Val; ++ ++ switch (N->getOpcode()) { ++ default: break; ++ case ISD::AND: ++ return performANDCombine(N, DAG, DCI, Subtarget); ++ case ISD::OR: ++ return performORCombine(N, DAG, DCI, Subtarget); ++ case ISD::XOR: ++ return performXORCombine(N, DAG, Subtarget); ++ case ISD::MUL: ++ return performMULCombine(N, DAG, DCI, this, Subtarget); ++ case ISD::SRA: ++ return performSRACombine(N, DAG, DCI, Subtarget); ++ case ISD::SELECT: ++ return performSELECTCombine(N, DAG, DCI, Subtarget); ++ case ISD::VSELECT: ++ return performVSELECTCombine(N, DAG); ++ case ISD::CONCAT_VECTORS: ++ return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget); ++ case ISD::SIGN_EXTEND: ++ return performSIGN_EXTENDCombine(N, DAG, DCI, Subtarget); ++ case ISD::ZERO_EXTEND: ++ return performZERO_EXTENDCombine(N, DAG, DCI, Subtarget); ++ case ISD::ADD: ++ case ISD::SUB: ++ case ISD::SHL: ++ case ISD::SRL: ++ return performLogicCombine(N, DAG, Subtarget); ++ } ++ return SDValue(); ++} ++ ++static SDValue lowerLSXSplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { ++ EVT ResVecTy = Op->getValueType(0); ++ EVT ViaVecTy = ResVecTy; ++ SDLoc DL(Op); ++ ++ // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and ++ // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating ++ // lanes. ++ SDValue LaneA = Op->getOperand(OpNr); ++ SDValue LaneB; ++ ++ if (ResVecTy == MVT::v2i64) { ++ // In case of the index being passed as an immediate value, set the upper ++ // lane to 0 so that the splati.d instruction can be matched. ++ if (isa(LaneA)) ++ LaneB = DAG.getConstant(0, DL, MVT::i32); ++ // Having the index passed in a register, set the upper lane to the same ++ // value as the lower - this results in the BUILD_VECTOR node not being ++ // expanded through stack. This way we are able to pattern match the set of ++ // nodes created here to splat.d. ++ else ++ LaneB = LaneA; ++ ViaVecTy = MVT::v4i32; ++ } else ++ LaneB = LaneA; ++ ++ SDValue Ops[16] = {LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, ++ LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB}; ++ ++ SDValue Result = DAG.getBuildVector( ++ ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); ++ ++ if (ViaVecTy != ResVecTy) { ++ SDValue One = DAG.getConstant(1, DL, ViaVecTy); ++ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, ++ DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); ++ } ++ ++ return Result; ++} ++ ++static SDValue lowerLSXSplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, ++ bool IsSigned = false) { ++ return DAG.getConstant( ++ APInt(Op->getValueType(0).getScalarType().getSizeInBits(), ++ Op->getConstantOperandVal(ImmOp), IsSigned), ++ SDLoc(Op), Op->getValueType(0)); ++} ++ ++static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, ++ SelectionDAG &DAG) { ++ EVT ViaVecTy = VecTy; ++ SDValue SplatValueA = SplatValue; ++ SDValue SplatValueB = SplatValue; ++ SDLoc DL(SplatValue); ++ ++ if (VecTy == MVT::v2i64) { ++ // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. ++ ViaVecTy = MVT::v4i32; ++ ++ SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); ++ SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, ++ DAG.getConstant(32, DL, MVT::i32)); ++ SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); ++ } ++ ++ SDValue Ops[32] = {SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB}; ++ ++ SDValue Result = DAG.getBuildVector( ++ ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); ++ ++ if (VecTy != ViaVecTy) ++ Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); ++ ++ return Result; ++} ++ ++static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDValue Vec = Op->getOperand(2); ++ MVT ResEltTy = ++ (ResTy == MVT::v2i64 || ResTy == MVT::v4i64) ? MVT::i64 : MVT::i32; ++ SDValue ConstValue = ++ DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResEltTy); ++ SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, DAG); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); ++} ++ ++static SDValue lowerLSXBitClear(SDValue Op, SelectionDAG &DAG) { ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ SDValue One = DAG.getConstant(1, DL, ResTy); ++ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), ++ DAG.getNOT(DL, Bit, ResTy)); ++} ++ ++static SDValue lowerLSXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++} ++ ++static SDValue lowerLASXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++} ++ ++static SDValue lowerLASXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++ return DAG.getNode(LoongArchISD::XVBROADCAST, DL, ++ DAG.getVTList(ResTy, MVT::Other), Load); ++} ++ ++static SDValue lowerLSXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++ return DAG.getNode(LoongArchISD::VBROADCAST, DL, ++ DAG.getVTList(ResTy, MVT::Other), Load); ++} ++ ++static SDValue lowerLSXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Value = Op->getOperand(2); ++ SDValue Address = Op->getOperand(3); ++ SDValue Offset = Op->getOperand(4); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ ++ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++} ++ ++static SDValue lowerLASXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Value = Op->getOperand(2); ++ SDValue Address = Op->getOperand(3); ++ SDValue Offset = Op->getOperand(4); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ ++ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++} ++ ++static SDValue LowerSUINT_TO_FP(unsigned ExtOpcode, SDValue Op, SelectionDAG &DAG) { ++ ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT ViaTy = Op0->getValueType(0); ++ SDLoc DL(Op); ++ ++ if (!ResTy.isVector()) { ++ if(ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) ++ return DAG.getNode(ISD::BITCAST, DL, ResTy, Op0); ++ else if(ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op0); ++ return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Op0); ++ } else { ++ Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Op0); ++ return DAG.getNode(ISD::TRUNCATE, DL, MVT::f32, Op0); ++ } ++ ++ } ++ ++ if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { ++ // v4i32 => v4f32 v8i32 => v8f32 ++ // v2i64 => v2f64 v4i64 => v4f64 ++ // do nothing ++ } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ // v4i32 => v4i64 => v4f64 ++ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); ++ Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); ++ } else { ++ // v4i64 => v4f32 ++ SDValue Ops[4]; ++ for (unsigned i = 0; i < 4; i++) { ++ SDValue I64 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op0, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Ops[i] = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, I64); ++ } ++ Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); ++ } ++ ++ return Op0; ++} ++ ++static SDValue LowerFP_TO_SUINT(unsigned FPToSUI, unsigned ExtOpcode, ++ SDValue Op, SelectionDAG &DAG) { ++ ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT ViaTy = Op0->getValueType(0); ++ SDLoc DL(Op); ++ ++ if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { ++ // v4f32 => v4i32 v8f32 => v8i32 ++ // v2f64 => v2i64 v4f64 => v4i64 ++ // do nothing ++ Op0 = DAG.getNode(FPToSUI, DL, ResTy, Op0); ++ } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ // v4f32 => v4i32 => v4i64 ++ Op0 = DAG.getNode(FPToSUI, DL, MVT::v4i32, Op0); ++ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); ++ Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); ++ } else { ++ SDValue Ops[4]; ++ Ops[0] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(0, DL, MVT::i64))); ++ Ops[1] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(1, DL, MVT::i64))); ++ Ops[2] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(2, DL, MVT::i64))); ++ Ops[3] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(3, DL, MVT::i64))); ++ ++ Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); ++ } ++ ++ return Op0; ++} ++ ++// Lower VECTOR_SHUFFLE into SHF (if possible). ++// ++// SHF splits the vector into blocks of four elements, then shuffles these ++// elements according to a <4 x i2> constant (encoded as an integer immediate). ++// ++// It is therefore possible to lower into SHF when the mask takes the form: ++// ++// When undef's appear they are treated as if they were whatever value is ++// necessary in order to fit the above forms. ++// ++// For example: ++// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, ++// <8 x i32> ++// is lowered to: ++// (VSHUF4I_H $v0, $v1, 27) ++// where the 27 comes from: ++// 3 + (2 << 2) + (1 << 4) + (0 << 6) ++static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ int SHFIndices[4] = {-1, -1, -1, -1}; ++ ++ if (Indices.size() < 4) ++ return SDValue(); ++ ++ for (unsigned i = 0; i < 4; ++i) { ++ for (unsigned j = i; j < Indices.size(); j += 4) { ++ int Idx = Indices[j]; ++ ++ // Convert from vector index to 4-element subvector index ++ // If an index refers to an element outside of the subvector then give up ++ if (Idx != -1) { ++ Idx -= 4 * (j / 4); ++ if (Idx < 0 || Idx >= 4) ++ return SDValue(); ++ } ++ ++ // If the mask has an undef, replace it with the current index. ++ // Note that it might still be undef if the current index is also undef ++ if (SHFIndices[i] == -1) ++ SHFIndices[i] = Idx; ++ ++ // Check that non-undef values are the same as in the mask. If they ++ // aren't then give up ++ if (!(Idx == -1 || Idx == SHFIndices[i])) ++ return SDValue(); ++ } ++ } ++ ++ // Calculate the immediate. Replace any remaining undefs with zero ++ APInt Imm(32, 0); ++ for (int i = 3; i >= 0; --i) { ++ int Idx = SHFIndices[i]; ++ ++ if (Idx == -1) ++ Idx = 0; ++ ++ Imm <<= 2; ++ Imm |= Idx & 0x3; ++ } ++ ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::SHF, DL, ResTy, ++ DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); ++} ++ ++/// Determine whether a range fits a regular pattern of values. ++/// This function accounts for the possibility of jumping over the End iterator. ++template ++static bool ++fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, ++ unsigned CheckStride, ++ typename SmallVectorImpl::const_iterator End, ++ ValType ExpectedIndex, unsigned ExpectedIndexStride) { ++ auto &I = Begin; ++ ++ while (I != End) { ++ if (*I != -1 && *I != ExpectedIndex) ++ return false; ++ ExpectedIndex += ExpectedIndexStride; ++ ++ // Incrementing past End is undefined behaviour so we must increment one ++ // step at a time and check for End at each step. ++ for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) ++ ; // Empty loop body. ++ } ++ return true; ++} ++ ++// Determine whether VECTOR_SHUFFLE is a VREPLVEI. ++// ++// It is a VREPLVEI when the mask is: ++// ++// where x is any valid index. ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above form. ++static bool isVECTOR_SHUFFLE_VREPLVEI(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ int SplatIndex = -1; ++ for (const auto &V : Indices) { ++ if (V != -1) { ++ SplatIndex = V; ++ break; ++ } ++ } ++ ++ return fitsRegularPattern(Indices.begin(), 1, Indices.end(), SplatIndex, ++ 0); ++} ++ ++// Lower VECTOR_SHUFFLE into VPACKEV (if possible). ++// ++// VPACKEV interleaves the even elements from each vector. ++// ++// It is possible to lower into VPACKEV when the mask consists of two of the ++// following forms interleaved: ++// <0, 2, 4, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 0, 2, 2, 4, 4, ...> ++// <0, n, 2, n+2, 4, n+4, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPACKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the even elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 0, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the even elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPACKOD (if possible). ++// ++// VPACKOD interleaves the odd elements from each vector. ++// ++// It is possible to lower into VPACKOD when the mask consists of two of the ++// following forms interleaved: ++// <1, 3, 5, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <1, 1, 3, 3, 5, 5, ...> ++// <1, n+1, 3, n+3, 5, n+5, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPACKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the odd elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 1, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the odd elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VILVL (if possible). ++// ++// VILVL interleaves consecutive elements from the right (lowest-indexed) half ++// of each vector. ++// ++// It is possible to lower into VILVL when the mask consists of two of the ++// following forms interleaved: ++// <0, 1, 2, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 0, 1, 1, 2, 2, ...> ++// <0, n, 1, n+1, 2, n+2, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VILVL(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the right (lowest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 0, 1)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the right (lowest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VILVH (if possible). ++// ++// VILVH interleaves consecutive elements from the left (highest-indexed) half ++// of each vector. ++// ++// It is possible to lower into VILVH when the mask consists of two of the ++// following forms interleaved: ++// ++// ++// where n is the number of elements in the vector and x is half n. ++// For example: ++// ++// ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VILVH(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ unsigned HalfSize = Indices.size() / 2; ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the left (highest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + HalfSize, 1)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the left (highest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, ++ 1)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPICKEV (if possible). ++// ++// VPICKEV copies the even elements of each vector into the result vector. ++// ++// It is possible to lower into VPICKEV when the mask consists of two of the ++// following forms concatenated: ++// <0, 2, 4, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 2, 4, ..., 0, 2, 4, ...> ++// <0, 2, 4, ..., n, n+2, n+4, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPICKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, Mid, Indices.size(), 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Mid, 1, End, 0, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Mid, 1, End, Indices.size(), 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPICKOD (if possible). ++// ++// VPICKOD copies the odd elements of each vector into the result vector. ++// ++// It is possible to lower into VPICKOD when the mask consists of two of the ++// following forms concatenated: ++// <1, 3, 5, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <1, 3, 5, ..., 1, 3, 5, ...> ++// <1, 3, 5, ..., n+1, n+3, n+5, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPICKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, Mid, Indices.size() + 1, 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Mid, 1, End, 1, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Mid, 1, End, Indices.size() + 1, 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VSHF. ++// ++// This mostly consists of converting the shuffle indices in Indices into a ++// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is ++// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, ++// if the type is v8i16 and all the indices are less than 8 then the second ++// operand is unused and can be replaced with anything. We choose to replace it ++// with the used operand since this reduces the number of instructions overall. ++static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ SmallVector Ops; ++ SDValue Op0; ++ SDValue Op1; ++ EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); ++ EVT MaskEltTy = MaskVecTy.getVectorElementType(); ++ bool Using1stVec = false; ++ bool Using2ndVec = false; ++ SDLoc DL(Op); ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ ++ for (int i = 0; i < ResTyNumElts; ++i) { ++ // Idx == -1 means UNDEF ++ int Idx = Indices[i]; ++ ++ if (0 <= Idx && Idx < ResTyNumElts) ++ Using1stVec = true; ++ if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) ++ Using2ndVec = true; ++ } ++ ++ for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); ++ ++I) ++ Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); ++ ++ SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); ++ ++ if (Using1stVec && Using2ndVec) { ++ Op0 = Op->getOperand(0); ++ Op1 = Op->getOperand(1); ++ } else if (Using1stVec) ++ Op0 = Op1 = Op->getOperand(0); ++ else if (Using2ndVec) ++ Op0 = Op1 = Op->getOperand(1); ++ else ++ llvm_unreachable("shuffle vector mask references neither vector operand?"); ++ ++ // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. ++ // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> ++ // VSHF concatenates the vectors in a bitwise fashion: ++ // <0b00, 0b01> + <0b10, 0b11> -> ++ // 0b0100 + 0b1110 -> 0b01001110 ++ // <0b10, 0b11, 0b00, 0b01> ++ // We must therefore swap the operands to get the correct result. ++ return DAG.getNode(LoongArchISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVILVL(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End - HalfSize, 0, 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End - HalfSize, Indices.size(), ++ 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize, 1)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, 0, 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, Indices.size(), ++ 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize, 1)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVILVH(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ unsigned HalfSize = Indices.size() / 2; ++ unsigned LeftSize = HalfSize / 2; ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 2, End - HalfSize, HalfSize - LeftSize, ++ 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End - HalfSize, ++ Indices.size() + HalfSize - LeftSize, 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize + LeftSize, 1)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, ++ 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, ++ 1)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, ++ Indices.size() + HalfSize - LeftSize, 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize + LeftSize, 1)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End, 0, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End, 1, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + 1, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, 1, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + 1, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static bool isVECTOR_SHUFFLE_XVREPLVEI(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ for (unsigned i = 0; i < HalfSize; i++) { ++ if (Indices[i] == -1 || Indices[HalfSize + i] == -1) ++ return false; ++ if (Indices[0] != Indices[i] || Indices[HalfSize] != Indices[HalfSize + i]) ++ return false; ++ } ++ return true; ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &LeftMid = Indices.begin() + Indices.size() / 4; ++ const auto &End = Indices.end(); ++ const auto &RightMid = Indices.end() - Indices.size() / 4; ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size(), 2) && ++ fitsRegularPattern(Mid, 1, RightMid, Indices.size() + HalfSize, ++ 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && ++ fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size(), 2) && ++ fitsRegularPattern(RightMid, 1, End, Indices.size() + HalfSize, ++ 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &LeftMid = Indices.begin() + Indices.size() / 4; ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &RightMid = Indices.end() - Indices.size() / 4; ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size() + 1, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, ++ Indices.size() + HalfSize + 1, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && ++ fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size() + 1, 2) && ++ fitsRegularPattern(RightMid, 1, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XSHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ int SHFIndices[4] = {-1, -1, -1, -1}; ++ ++ // If the size of the mask is 4, it should not be converted to SHF node, ++ // because SHF only corresponds to type b/h/w instruction but no type d. ++ if (Indices.size() <= 4) ++ return SDValue(); ++ ++ int HalfSize = Indices.size() / 2; ++ for (int i = 0; i < 4; ++i) { ++ for (int j = i; j < HalfSize; j += 4) { ++ int Idx = Indices[j]; ++ // check mxshf ++ if (Idx + HalfSize != Indices[j + HalfSize]) ++ return SDValue(); ++ ++ // Convert from vector index to 4-element subvector index ++ // If an index refers to an element outside of the subvector then give up ++ if (Idx != -1) { ++ Idx -= 4 * (j / 4); ++ if (Idx < 0 || Idx >= 4) ++ return SDValue(); ++ } ++ ++ // If the mask has an undef, replace it with the current index. ++ // Note that it might still be undef if the current index is also undef ++ if (SHFIndices[i] == -1) ++ SHFIndices[i] = Idx; ++ ++ // Check that non-undef values are the same as in the mask. If they ++ // aren't then give up ++ if (!(Idx == -1 || Idx == SHFIndices[i])) ++ return SDValue(); ++ } ++ } ++ ++ // Calculate the immediate. Replace any remaining undefs with zero ++ APInt Imm(32, 0); ++ for (int i = 3; i >= 0; --i) { ++ int Idx = SHFIndices[i]; ++ ++ if (Idx == -1) ++ Idx = 0; ++ ++ Imm <<= 2; ++ Imm |= Idx & 0x3; ++ } ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::SHF, DL, ResTy, ++ DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); ++} ++ ++static bool isConstantOrUndef(const SDValue Op) { ++ if (Op->isUndef()) ++ return true; ++ if (isa(Op)) ++ return true; ++ if (isa(Op)) ++ return true; ++ return false; ++} ++ ++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { ++ for (unsigned i = 0; i < Op->getNumOperands(); ++i) ++ if (isConstantOrUndef(Op->getOperand(i))) ++ return true; ++ return false; ++} ++ ++static bool isLASXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ ViaVecTy = MVT::v32i8; ++ break; ++ case 16: ++ ViaVecTy = MVT::v16i16; ++ break; ++ case 32: ++ ViaVecTy = MVT::v8i32; ++ break; ++ case 64: ++ ViaVecTy = MVT::v4i64; ++ break; ++ case 128: ++ // There's no fill.q to fall back on for 64-bit values ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool isLSXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ ViaVecTy = MVT::v16i8; ++ break; ++ case 16: ++ ViaVecTy = MVT::v8i16; ++ break; ++ case 32: ++ ViaVecTy = MVT::v4i32; ++ break; ++ case 64: ++ // There's no fill.d to fall back on for 64-bit values ++ return false; ++ } ++ ++ return true; ++} ++ ++bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } ++ ++bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } ++ ++void LoongArchTargetLowering::LowerOperationWrapper( ++ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { ++ SDValue Res = LowerOperation(SDValue(N, 0), DAG); ++ ++ if (!Res.getNode()) ++ return; ++ ++ assert((N->getNumValues() <= Res->getNumValues()) && ++ "Lowering returned the wrong number of results!"); ++ ++ for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) ++ Results.push_back(Res.getValue(I)); ++} ++ ++void LoongArchTargetLowering::ReplaceNodeResults( ++ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { ++ SDLoc DL(N); ++ switch (N->getOpcode()) { ++ default: ++ return LowerOperationWrapper(N, Results, DAG); ++ case LoongArchISD::VABSD: ++ case LoongArchISD::UVABSD: { ++ EVT VT = N->getValueType(0); ++ assert(VT.isVector() && "Unexpected VT"); ++ if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { ++ EVT PromoteVT; ++ if (VT.getVectorNumElements() == 2) ++ PromoteVT = MVT::v2i64; ++ else if (VT.getVectorNumElements() == 4) ++ PromoteVT = MVT::v4i32; ++ else if (VT.getVectorNumElements() == 8) ++ PromoteVT = MVT::v8i16; ++ else ++ return; ++ ++ SDValue N0 = ++ DAG.getNode(ISD::ANY_EXTEND, DL, PromoteVT, N->getOperand(0)); ++ SDValue N1 = ++ DAG.getNode(ISD::ANY_EXTEND, DL, PromoteVT, N->getOperand(1)); ++ ++ SDValue Vabsd = ++ DAG.getNode(N->getOpcode(), DL, PromoteVT, N0, N1, N->getOperand(2)); ++ ++ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Vabsd)); ++ } ++ return; ++ } ++ } ++} ++ ++SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, ++ SelectionDAG &DAG) const { ++ switch (Op.getOpcode()) { ++ case ISD::STORE: ++ return lowerSTORE(Op, DAG); ++ case ISD::INTRINSIC_WO_CHAIN: ++ return lowerINTRINSIC_WO_CHAIN(Op, DAG); ++ case ISD::INTRINSIC_W_CHAIN: ++ return lowerINTRINSIC_W_CHAIN(Op, DAG); ++ case ISD::INTRINSIC_VOID: ++ return lowerINTRINSIC_VOID(Op, DAG); ++ case ISD::EXTRACT_VECTOR_ELT: ++ return lowerEXTRACT_VECTOR_ELT(Op, DAG); ++ case ISD::INSERT_VECTOR_ELT: ++ return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::BUILD_VECTOR: ++ return lowerBUILD_VECTOR(Op, DAG); ++ case ISD::VECTOR_SHUFFLE: ++ return lowerVECTOR_SHUFFLE(Op, DAG); ++ case ISD::UINT_TO_FP: ++ return lowerUINT_TO_FP(Op, DAG); ++ case ISD::SINT_TO_FP: ++ return lowerSINT_TO_FP(Op, DAG); ++ case ISD::FP_TO_UINT: ++ return lowerFP_TO_UINT(Op, DAG); ++ case ISD::FP_TO_SINT: ++ return lowerFP_TO_SINT(Op, DAG); ++ case ISD::BRCOND: ++ return lowerBRCOND(Op, DAG); ++ case ISD::ConstantPool: ++ return lowerConstantPool(Op, DAG); ++ case ISD::GlobalAddress: ++ return lowerGlobalAddress(Op, DAG); ++ case ISD::BlockAddress: ++ return lowerBlockAddress(Op, DAG); ++ case ISD::GlobalTLSAddress: ++ return lowerGlobalTLSAddress(Op, DAG); ++ case ISD::JumpTable: ++ return lowerJumpTable(Op, DAG); ++ case ISD::SELECT: ++ return lowerSELECT(Op, DAG); ++ case ISD::SETCC: ++ return lowerSETCC(Op, DAG); ++ case ISD::VASTART: ++ return lowerVASTART(Op, DAG); ++ case ISD::VAARG: ++ return lowerVAARG(Op, DAG); ++ case ISD::FRAMEADDR: ++ return lowerFRAMEADDR(Op, DAG); ++ case ISD::RETURNADDR: ++ return lowerRETURNADDR(Op, DAG); ++ case ISD::EH_RETURN: ++ return lowerEH_RETURN(Op, DAG); ++ case ISD::ATOMIC_FENCE: ++ return lowerATOMIC_FENCE(Op, DAG); ++ case ISD::SHL_PARTS: ++ return lowerShiftLeftParts(Op, DAG); ++ case ISD::SRA_PARTS: ++ return lowerShiftRightParts(Op, DAG, true); ++ case ISD::SRL_PARTS: ++ return lowerShiftRightParts(Op, DAG, false); ++ case ISD::EH_DWARF_CFA: ++ return lowerEH_DWARF_CFA(Op, DAG); ++ } ++ return SDValue(); ++} ++ ++//===----------------------------------------------------------------------===// ++// Lower helper functions ++//===----------------------------------------------------------------------===// ++ ++template ++SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, ++ bool IsLocal) const { ++ SDLoc DL(N); ++ EVT Ty = getPointerTy(DAG.getDataLayout()); ++ ++ if (isPositionIndependent()) { ++ SDValue Addr = getTargetNode(N, Ty, DAG, 0U); ++ if (IsLocal) ++ // Use PC-relative addressing to access the symbol. ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), ++ 0); ++ ++ // Use PC-relative addressing to access the GOT for this symbol, then load ++ // the address from the GOT. ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrGlobal, DL, Ty, Addr), ++ 0); ++ } ++ ++ SDValue Addr = getTargetNode(N, Ty, DAG, 0U); ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), 0); ++} ++ ++// addLiveIn - This helper function adds the specified physical register to the ++// MachineFunction as a live in value. It also creates a corresponding ++// virtual register for it. ++static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, ++ const TargetRegisterClass *RC) { ++ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); ++ MF.getRegInfo().addLiveIn(PReg, VReg); ++ return VReg; ++} ++ ++static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, ++ MachineBasicBlock &MBB, ++ const TargetInstrInfo &TII, ++ bool Is64Bit) { ++ if (NoZeroDivCheck) ++ return &MBB; ++ ++ // Insert pseudo instruction(PseudoTEQ), will expand: ++ // beq $divisor_reg, $zero, 8 ++ // break 7 ++ MachineBasicBlock::iterator I(MI); ++ MachineInstrBuilder MIB; ++ MachineOperand &Divisor = MI.getOperand(2); ++ unsigned TeqOp = LoongArch::PseudoTEQ; ++ ++ MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(TeqOp)) ++ .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())); ++ ++ // Use the 32-bit sub-register if this is a 64-bit division. ++ //if (Is64Bit) ++ // MIB->getOperand(0).setSubReg(LoongArch::sub_32); ++ ++ // Clear Divisor's kill flag. ++ Divisor.setIsKill(false); ++ ++ // We would normally delete the original instruction here but in this case ++ // we only needed to inject an additional instruction rather than replace it. ++ ++ return &MBB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected instr type to insert"); ++ case LoongArch::FILL_FW_PSEUDO: ++ return emitFILL_FW(MI, BB); ++ case LoongArch::FILL_FD_PSEUDO: ++ return emitFILL_FD(MI, BB); ++ case LoongArch::SNZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_B); ++ case LoongArch::SNZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_H); ++ case LoongArch::SNZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_W); ++ case LoongArch::SNZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_D); ++ case LoongArch::SNZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETNEZ_V); ++ case LoongArch::SZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_B); ++ case LoongArch::SZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_H); ++ case LoongArch::SZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_W); ++ case LoongArch::SZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_D); ++ case LoongArch::SZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETEQZ_V); ++ case LoongArch::XSNZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_B); ++ case LoongArch::XSNZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_H); ++ case LoongArch::XSNZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_W); ++ case LoongArch::XSNZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_D); ++ case LoongArch::XSNZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETNEZ_V); ++ case LoongArch::XSZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_B); ++ case LoongArch::XSZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_H); ++ case LoongArch::XSZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_W); ++ case LoongArch::XSZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_D); ++ case LoongArch::XSZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETEQZ_V); ++ case LoongArch::INSERT_FW_PSEUDO: ++ return emitINSERT_FW(MI, BB); ++ case LoongArch::INSERT_FD_PSEUDO: ++ return emitINSERT_FD(MI, BB); ++ case LoongArch::XINSERT_H_PSEUDO: ++ return emitXINSERT_BH(MI, BB, 2); ++ case LoongArch::XCOPY_FW_PSEUDO: ++ return emitXCOPY_FW(MI, BB); ++ case LoongArch::XCOPY_FD_PSEUDO: ++ return emitXCOPY_FD(MI, BB); ++ case LoongArch::XINSERT_FW_PSEUDO: ++ return emitXINSERT_FW(MI, BB); ++ case LoongArch::COPY_FW_PSEUDO: ++ return emitCOPY_FW(MI, BB); ++ case LoongArch::XFILL_FW_PSEUDO: ++ return emitXFILL_FW(MI, BB); ++ case LoongArch::XFILL_FD_PSEUDO: ++ return emitXFILL_FD(MI, BB); ++ case LoongArch::COPY_FD_PSEUDO: ++ return emitCOPY_FD(MI, BB); ++ case LoongArch::XINSERT_FD_PSEUDO: ++ return emitXINSERT_FD(MI, BB); ++ case LoongArch::XINSERT_B_PSEUDO: ++ return emitXINSERT_BH(MI, BB, 1); ++ case LoongArch::CONCAT_VECTORS_B_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 1); ++ case LoongArch::CONCAT_VECTORS_H_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 2); ++ case LoongArch::CONCAT_VECTORS_W_PSEUDO: ++ case LoongArch::CONCAT_VECTORS_FW_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 4); ++ case LoongArch::CONCAT_VECTORS_D_PSEUDO: ++ case LoongArch::CONCAT_VECTORS_FD_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 8); ++ case LoongArch::XCOPY_FW_GPR_PSEUDO: ++ return emitXCOPY_FW_GPR(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_ADD_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_ADD_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_ADD_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_ADD_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_AND_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_AND_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_AND_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_AND_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_OR_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_OR_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_OR_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_OR_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_XOR_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_XOR_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_XOR_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_XOR_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_NAND_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_NAND_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_NAND_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_NAND_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_SUB_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_SUB_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_SUB_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_SUB_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_SWAP_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_SWAP_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_SWAP_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_SWAP_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::XINSERT_B_VIDX_PSEUDO: ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO: ++ return emitXINSERT_B(MI, BB); ++ case LoongArch::INSERT_H_VIDX64_PSEUDO: ++ return emitINSERT_H_VIDX(MI, BB); ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO: ++ return emitXINSERT_DF_VIDX(MI, BB, false); ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO: ++ return emitXINSERT_DF_VIDX(MI, BB, true); ++ ++ case LoongArch::ATOMIC_LOAD_MAX_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_MAX_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_MAX_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_MAX_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_MIN_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_MIN_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_MIN_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_MIN_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_UMAX_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_UMAX_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_UMAX_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_UMAX_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_UMIN_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_UMIN_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_UMIN_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_UMIN_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_CMP_SWAP_I8: ++ return emitAtomicCmpSwapPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_CMP_SWAP_I16: ++ return emitAtomicCmpSwapPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_CMP_SWAP_I32: ++ return emitAtomicCmpSwap(MI, BB); ++ case LoongArch::ATOMIC_CMP_SWAP_I64: ++ return emitAtomicCmpSwap(MI, BB); ++ ++ case LoongArch::PseudoSELECT_I: ++ case LoongArch::PseudoSELECT_I64: ++ case LoongArch::PseudoSELECT_S: ++ case LoongArch::PseudoSELECT_D64: ++ return emitPseudoSELECT(MI, BB, false, LoongArch::BNE32); ++ ++ case LoongArch::PseudoSELECTFP_T_I: ++ case LoongArch::PseudoSELECTFP_T_I64: ++ return emitPseudoSELECT(MI, BB, true, LoongArch::BCNEZ); ++ ++ case LoongArch::PseudoSELECTFP_F_I: ++ case LoongArch::PseudoSELECTFP_F_I64: ++ return emitPseudoSELECT(MI, BB, true, LoongArch::BCEQZ); ++ case LoongArch::DIV_W: ++ case LoongArch::DIV_WU: ++ case LoongArch::MOD_W: ++ case LoongArch::MOD_WU: ++ return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false); ++ case LoongArch::DIV_D: ++ case LoongArch::DIV_DU: ++ case LoongArch::MOD_D: ++ case LoongArch::MOD_DU: ++ return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true); ++ } ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitXINSERT_DF_VIDX( ++ MachineInstr &MI, MachineBasicBlock *BB, bool IsGPR64) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ insertOp = IsGPR64 ? LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA ++ : LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA; ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ ++ const TargetRegisterClass *RC = ++ IsGPR64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(SrcValReg) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::XVPICKVE2GR_W), Rj) ++ .addReg(Xj) ++ .addImm(0); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rj, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_H_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ unsigned isGP64 = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::INSERT_H_VIDX64_PSEUDO: ++ isGP64 = 1; ++ insertOp = LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo vector for replacement!"); ++ } ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ ++ const TargetRegisterClass *RC = ++ isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_B(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ unsigned isGP64 = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO: ++ isGP64 = 1; ++ insertOp = LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA; ++ break; ++ case LoongArch::XINSERT_B_VIDX_PSEUDO: ++ insertOp = LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo vector for replacement!"); ++ } ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ const TargetRegisterClass *RC = ++ isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned Rimm = RegInfo.createVirtualRegister(RC); ++ unsigned R4r = RegInfo.createVirtualRegister(RC); ++ unsigned Rib = RegInfo.createVirtualRegister(RC); ++ unsigned Ris = RegInfo.createVirtualRegister(RC); ++ unsigned R7b1 = RegInfo.createVirtualRegister(RC); ++ unsigned R7b2 = RegInfo.createVirtualRegister(RC); ++ unsigned R7b3 = RegInfo.createVirtualRegister(RC); ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ unsigned R7r80_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r80l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r81_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r81l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r82_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r82l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R70 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned tmp_Dst73 = ++ RegInfo.createVirtualRegister(&LoongArch::LASX256BRegClass); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R4r, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rib, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Ris, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b1, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b2, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r80_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r80l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r81_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r81l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r82_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r82l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(tmp_Dst73, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rimm, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R70, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++const TargetRegisterClass * ++LoongArchTargetLowering::getRepRegClassFor(MVT VT) const { ++ return TargetLowering::getRepRegClassFor(VT); ++} ++ ++// This function also handles LoongArch::ATOMIC_SWAP_I32 (when BinOpcode == 0), and ++// LoongArch::ATOMIC_LOAD_NAND_I32 (when Nand == true) ++MachineBasicBlock * ++LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned AtomicOp; ++ switch (MI.getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_ADD_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I32: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I64: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo atomic for replacement!"); ++ } ++ ++ unsigned OldVal = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned Incr = MI.getOperand(2).getReg(); ++ unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ // The scratch registers here with the EarlyClobber | Define | Implicit ++ // flags is used to persuade the register allocator and the machine ++ // verifier to accept the usage of this register. This has to be a real ++ // register which has an UNDEF value but is dead after the instruction which ++ // is unique among the registers chosen for the instruction. ++ ++ // The EarlyClobber flag has the semantic properties that the operand it is ++ // attached to is clobbered before the rest of the inputs are read. Hence it ++ // must be unique among the operands to the instruction. ++ // The Define flag is needed to coerce the machine verifier that an Undef ++ // value isn't a problem. ++ // The Dead flag is needed as the value in scratch isn't used by any other ++ // instruction. Kill isn't used as Dead is more precise. ++ // The implicit flag is here due to the interaction between the other flags ++ // and the machine verifier. ++ ++ // For correctness purpose, a new pseudo is introduced here. We need this ++ // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence ++ // that is spread over >1 basic blocks. A register allocator which ++ // introduces (or any codegen infact) a store, can violate the expectations ++ // of the hardware. ++ // ++ // An atomic read-modify-write sequence starts with a linked load ++ // instruction and ends with a store conditional instruction. The atomic ++ // read-modify-write sequence fails if any of the following conditions ++ // occur between the execution of ll and sc: ++ // * A coherent store is completed by another process or coherent I/O ++ // module into the block of synchronizable physical memory containing ++ // the word. The size and alignment of the block is ++ // implementation-dependent. ++ // * A coherent store is executed between an LL and SC sequence on the ++ // same processor to the block of synchornizable physical memory ++ // containing the word. ++ // ++ ++ unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); ++ unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), IncrCopy).addReg(Incr); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); ++ ++ BuildMI(*BB, II, DL, TII->get(AtomicOp)) ++ .addReg(OldVal, RegState::Define | RegState::EarlyClobber) ++ .addReg(PtrCopy) ++ .addReg(IncrCopy) ++ .addReg(Scratch, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32 ++ || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitSignExtendToI32InReg( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, ++ unsigned SrcReg) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const DebugLoc &DL = MI.getDebugLoc(); ++ if (Size == 1) { ++ BuildMI(BB, DL, TII->get(LoongArch::EXT_W_B32), DstReg).addReg(SrcReg); ++ return BB; ++ } ++ ++ if (Size == 2) { ++ BuildMI(BB, DL, TII->get(LoongArch::EXT_W_H32), DstReg).addReg(SrcReg); ++ return BB; ++ } ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ unsigned ScrReg = RegInfo.createVirtualRegister(RC); ++ ++ assert(Size < 32); ++ int64_t ShiftImm = 32 - (Size * 8); ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ScrReg).addReg(SrcReg).addImm(ShiftImm); ++ BuildMI(BB, DL, TII->get(LoongArch::SRAI_W), DstReg).addReg(ScrReg).addImm(ShiftImm); ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { ++ assert((Size == 1 || Size == 2) && ++ "Unsupported size for EmitAtomicBinaryPartial."); ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ const bool ArePtrs64bit = ABI.ArePtrs64bit(); ++ const TargetRegisterClass *RCp = ++ getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned Incr = MI.getOperand(2).getReg(); ++ ++ unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); ++ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); ++ unsigned Mask = RegInfo.createVirtualRegister(RC); ++ unsigned Mask2 = RegInfo.createVirtualRegister(RC); ++ unsigned Incr2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); ++ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUpper = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUppest = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch2 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch3 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch4 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch5 = RegInfo.createVirtualRegister(RC); ++ ++ unsigned AtomicOp = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_NAND_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I8: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I16: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown subword atomic pseudo for expansion!"); ++ } ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // thisMBB: ++ // addiu masklsb2,$0,-4 # 0xfffffffc ++ // and alignedaddr,ptr,masklsb2 ++ // andi ptrlsb2,ptr,3 ++ // sll shiftamt,ptrlsb2,3 ++ // ori maskupper,$0,255 # 0xff ++ // sll mask,maskupper,shiftamt ++ // nor mask2,$0,mask ++ // sll incr2,incr,shiftamt ++ ++ int64_t MaskImm = (Size == 1) ? 255 : 4095; ++ BuildMI(BB, DL, TII->get(ABI.GetPtrAddiOp()), MaskLSB2) ++ .addReg(ABI.GetNullPtr()).addImm(-4); ++ BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) ++ .addReg(Ptr).addReg(MaskLSB2); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) ++ .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); ++ ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(MaskUppest).addImm(MaskImm); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(LoongArch::ZERO).addImm(MaskImm); ++ } ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) ++ .addReg(MaskUpper).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Incr2).addReg(Incr).addReg(ShiftAmt); ++ ++ ++ // The purposes of the flags on the scratch registers is explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among registers chosen for the instruction. ++ ++ BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(BB, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(AlignedAddr) ++ .addReg(Incr2) ++ .addReg(Mask) ++ .addReg(Mask2) ++ .addReg(ShiftAmt) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch4, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch5, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return exitMBB; ++} ++ ++// Lower atomic compare and swap to a pseudo instruction, taking care to ++// define a scratch register for the pseudo instruction's expansion. The ++// instruction is expanded after the register allocator as to prevent ++// the insertion of stores between the linked load and the store conditional. ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 || ++ MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) && ++ "Unsupported atomic psseudo for EmitAtomicCmpSwap."); ++ ++ const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8; ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &MRI = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ++ ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned OldVal = MI.getOperand(2).getReg(); ++ unsigned NewVal = MI.getOperand(3).getReg(); ++ ++ unsigned Scratch = MRI.createVirtualRegister(RC); ++ MachineBasicBlock::iterator II(MI); ++ ++ // We need to create copies of the various registers and kill them at the ++ // atomic pseudo. If the copies are not made, when the atomic is expanded ++ // after fast register allocation, the spills will end up outside of the ++ // blocks that their values are defined in, causing livein errors. ++ ++ unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); ++ unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); ++ unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); ++ ++ // The purposes of the flags on the scratch registers is explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among registers chosen for the instruction. ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(*BB, II, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(PtrCopy, RegState::Kill) ++ .addReg(OldValCopy, RegState::Kill) ++ .addReg(NewValCopy, RegState::Kill) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { ++ assert((Size == 1 || Size == 2) && ++ "Unsupported size for EmitAtomicCmpSwapPartial."); ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ const bool ArePtrs64bit = ABI.ArePtrs64bit(); ++ const TargetRegisterClass *RCp = ++ getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned CmpVal = MI.getOperand(2).getReg(); ++ unsigned NewVal = MI.getOperand(3).getReg(); ++ ++ unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); ++ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); ++ unsigned Mask = RegInfo.createVirtualRegister(RC); ++ unsigned Mask2 = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); ++ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); ++ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUpper = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUppest = RegInfo.createVirtualRegister(RC); ++ unsigned Mask3 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); ++ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); ++ unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8 ++ ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; ++ ++ // The scratch registers here with the EarlyClobber | Define | Dead | Implicit ++ // flags are used to coerce the register allocator and the machine verifier to ++ // accept the usage of these registers. ++ // The EarlyClobber flag has the semantic properties that the operand it is ++ // attached to is clobbered before the rest of the inputs are read. Hence it ++ // must be unique among the operands to the instruction. ++ // The Define flag is needed to coerce the machine verifier that an Undef ++ // value isn't a problem. ++ // The Dead flag is needed as the value in scratch isn't used by any other ++ // instruction. Kill isn't used as Dead is more precise. ++ unsigned Scratch = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch2 = RegInfo.createVirtualRegister(RC); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // thisMBB: ++ // addiu masklsb2,$0,-4 # 0xfffffffc ++ // and alignedaddr,ptr,masklsb2 ++ // andi ptrlsb2,ptr,3 ++ // xori ptrlsb2,ptrlsb2,3 # Only for BE ++ // sll shiftamt,ptrlsb2,3 ++ // ori maskupper,$0,255 # 0xff ++ // sll mask,maskupper,shiftamt ++ // nor mask2,$0,mask ++ // andi maskedcmpval,cmpval,255 ++ // sll shiftedcmpval,maskedcmpval,shiftamt ++ // andi maskednewval,newval,255 ++ // sll shiftednewval,maskednewval,shiftamt ++ ++ int64_t MaskImm = (Size == 1) ? 255 : 4095; ++ BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::ADDI_D : LoongArch::ADDI_W), MaskLSB2) ++ .addReg(ABI.GetNullPtr()).addImm(-4); ++ BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::AND : LoongArch::AND32), AlignedAddr) ++ .addReg(Ptr).addReg(MaskLSB2); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) ++ .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); ++ ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(MaskUppest).addImm(MaskImm); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(LoongArch::ZERO).addImm(MaskImm); ++ } ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) ++ .addReg(MaskUpper).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), Mask3) ++ .addReg(MaskUppest).addImm(MaskImm); ++ BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedCmpVal) ++ .addReg(CmpVal).addReg(Mask3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) ++ .addReg(MaskedCmpVal).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedNewVal) ++ .addReg(NewVal).addReg(Mask3); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedCmpVal) ++ .addReg(CmpVal).addImm(MaskImm); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) ++ .addReg(MaskedCmpVal).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedNewVal) ++ .addReg(NewVal).addImm(MaskImm); ++ } ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) ++ .addReg(MaskedNewVal).addReg(ShiftAmt); ++ ++ // The purposes of the flags on the scratch registers are explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among the register chosen for the instruction. ++ ++ BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(BB, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(AlignedAddr) ++ .addReg(Mask) ++ .addReg(ShiftedCmpVal) ++ .addReg(Mask2) ++ .addReg(ShiftedNewVal) ++ .addReg(ShiftAmt) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return exitMBB; ++} ++ ++SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { ++ // The first operand is the chain, the second is the condition, the third is ++ // the block to branch to if the condition is true. ++ SDValue Chain = Op.getOperand(0); ++ SDValue Dest = Op.getOperand(2); ++ SDLoc DL(Op); ++ ++ SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); ++ ++ // Return if flag is not set by a floating point comparison. ++ if (CondRes.getOpcode() != LoongArchISD::FPCmp) ++ return Op; ++ ++ SDValue CCNode = CondRes.getOperand(2); ++ LoongArch::CondCode CC = ++ (LoongArch::CondCode)cast(CCNode)->getZExtValue(); ++ unsigned Opc = invertFPCondCodeUser(CC) ? LoongArch::BRANCH_F : LoongArch::BRANCH_T; ++ SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); ++ SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ return DAG.getNode(LoongArchISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, ++ FCC0, Dest, CondRes); ++} ++ ++SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); ++ ++ // Return if flag is not set by a floating point comparison. ++ if (Cond.getOpcode() != LoongArchISD::FPCmp) ++ return Op; ++ ++ SDValue N1 = Op.getOperand(1); ++ SDValue N2 = Op.getOperand(2); ++ SDLoc DL(Op); ++ ++ ConstantSDNode *CC = cast(Cond.getOperand(2)); ++ bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); ++ SDValue FCC = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ ++ if (Op->getSimpleValueType(0).SimpleTy == MVT::f64 || ++ Op->getSimpleValueType(0).SimpleTy == MVT::f32) { ++ if (invert) ++ return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N1, FCC, N2, ++ Cond); ++ else ++ return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N2, FCC, N1, ++ Cond); ++ ++ } else ++ return Op; ++} ++ ++SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { ++ SDValue Cond = createFPCmp(DAG, Op); ++ ++ assert(Cond.getOpcode() == LoongArchISD::FPCmp && ++ "Floating point operand expected."); ++ ++ SDLoc DL(Op); ++ SDValue True = DAG.getConstant(1, DL, MVT::i32); ++ SDValue False = DAG.getConstant(0, DL, MVT::i32); ++ ++ return createCMovFP(DAG, Cond, True, False, DL); ++} ++ ++SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, ++ SelectionDAG &DAG) const { ++ GlobalAddressSDNode *N = cast(Op); ++ ++ const GlobalValue *GV = N->getGlobal(); ++ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); ++ SDValue Addr = getAddr(N, DAG, IsLocal); ++ ++ return Addr; ++} ++ ++SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, ++ SelectionDAG &DAG) const { ++ BlockAddressSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const ++{ ++ GlobalAddressSDNode *GA = cast(Op); ++ if (DAG.getTarget().useEmulatedTLS()) ++ return LowerToTLSEmulatedModel(GA, DAG); ++ ++ SDLoc DL(GA); ++ const GlobalValue *GV = GA->getGlobal(); ++ EVT PtrVT = getPointerTy(DAG.getDataLayout()); ++ ++ TLSModel::Model model = getTargetMachine().getTLSModel(GV); ++ ++ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { ++ // General Dynamic TLS Model && Local Dynamic TLS Model ++ unsigned PtrSize = PtrVT.getSizeInBits(); ++ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); ++ // SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrTy, 0, 0); ++ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); ++ SDValue Load = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_GD , ++ DL, PtrVT, Addr), 0); ++ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ++ ++ ArgListTy Args; ++ ArgListEntry Entry; ++ Entry.Node = Load; ++ Entry.Ty = PtrTy; ++ Args.push_back(Entry); ++ ++ TargetLowering::CallLoweringInfo CLI(DAG); ++ CLI.setDebugLoc(DL) ++ .setChain(DAG.getEntryNode()) ++ .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); ++ std::pair CallResult = LowerCallTo(CLI); ++ ++ SDValue Ret = CallResult.first; ++ ++ return Ret; ++ } ++ ++ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); ++ SDValue Offset; ++ if (model == TLSModel::InitialExec) { ++ // Initial Exec TLS Model ++ Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_IE, DL, ++ PtrVT, Addr), 0); ++ } else { ++ // Local Exec TLS Model ++ assert(model == TLSModel::LocalExec); ++ Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_LE, DL, ++ PtrVT, Addr), 0); ++ } ++ ++ SDValue ThreadPointer = DAG.getRegister((PtrVT == MVT::i32) ++ ? LoongArch::TP ++ : LoongArch::TP_64, PtrVT); ++ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerJumpTable(SDValue Op, SelectionDAG &DAG) const ++{ ++ JumpTableSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerConstantPool(SDValue Op, SelectionDAG &DAG) const ++{ ++ ConstantPoolSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ LoongArchFunctionInfo *FuncInfo = MF.getInfo(); ++ ++ SDLoc DL(Op); ++ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), ++ getPointerTy(MF.getDataLayout())); ++ ++ // vastart just stores the address of the VarArgsFrameIndex slot into the ++ // memory location argument. ++ const Value *SV = cast(Op.getOperand(2))->getValue(); ++ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), ++ MachinePointerInfo(SV)); ++} ++ ++SDValue LoongArchTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { ++ SDNode *Node = Op.getNode(); ++ EVT VT = Node->getValueType(0); ++ SDValue Chain = Node->getOperand(0); ++ SDValue VAListPtr = Node->getOperand(1); ++ const Align Align = ++ llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); ++ const Value *SV = cast(Node->getOperand(2))->getValue(); ++ SDLoc DL(Node); ++ unsigned ArgSlotSizeInBytes = (ABI.IsLPX32() || ABI.IsLP64()) ? 8 : 4; ++ ++ SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, ++ VAListPtr, MachinePointerInfo(SV)); ++ SDValue VAList = VAListLoad; ++ ++ // Re-align the pointer if necessary. ++ // It should only ever be necessary for 64-bit types on LP32 since the minimum ++ // argument alignment is the same as the maximum type alignment for LPX32/LP64. ++ // ++ // FIXME: We currently align too often. The code generator doesn't notice ++ // when the pointer is still aligned from the last va_arg (or pair of ++ // va_args for the i64 on LP32 case). ++ if (Align > getMinStackArgumentAlignment()) { ++ VAList = DAG.getNode( ++ ISD::ADD, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); ++ ++ VAList = DAG.getNode( ++ ISD::AND, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); ++ } ++ ++ // Increment the pointer, VAList, to the next vaarg. ++ auto &TD = DAG.getDataLayout(); ++ unsigned ArgSizeInBytes = ++ TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); ++ SDValue Tmp3 = ++ DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), ++ DL, VAList.getValueType())); ++ // Store the incremented VAList to the legalized pointer ++ Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, ++ MachinePointerInfo(SV)); ++ ++ // Load the actual argument out of the pointer VAList ++ return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { ++ // check the depth ++ assert((cast(Op.getOperand(0))->getZExtValue() == 0) && ++ "Frame address can only be determined for current frame."); ++ ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ MFI.setFrameAddressIsTaken(true); ++ EVT VT = Op.getValueType(); ++ SDLoc DL(Op); ++ SDValue FrameAddr = DAG.getCopyFromReg( ++ DAG.getEntryNode(), DL, ABI.IsLP64() ? LoongArch::FP_64 : LoongArch::FP, VT); ++ return FrameAddr; ++} ++ ++SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (verifyReturnAddressArgumentIsConstant(Op, DAG)) ++ return SDValue(); ++ ++ // check the depth ++ assert((cast(Op.getOperand(0))->getZExtValue() == 0) && ++ "Return address can be determined only for current frame."); ++ ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ MVT VT = Op.getSimpleValueType(); ++ unsigned RA = ABI.IsLP64() ? LoongArch::RA_64 : LoongArch::RA; ++ MFI.setReturnAddressIsTaken(true); ++ ++ // Return RA, which contains the return address. Mark it an implicit live-in. ++ unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); ++ return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); ++} ++ ++// An EH_RETURN is the result of lowering llvm.eh.return which in turn is ++// generated from __builtin_eh_return (offset, handler) ++// The effect of this is to adjust the stack pointer by "offset" ++// and then branch to "handler". ++SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) ++ const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ LoongArchFI->setCallsEhReturn(); ++ SDValue Chain = Op.getOperand(0); ++ SDValue Offset = Op.getOperand(1); ++ SDValue Handler = Op.getOperand(2); ++ SDLoc DL(Op); ++ EVT Ty = ABI.IsLP64() ? MVT::i64 : MVT::i32; ++ ++ // Store stack offset in A1, store jump target in A0. Glue CopyToReg and ++ // EH_RETURN nodes, so that instructions are emitted back-to-back. ++ unsigned OffsetReg = ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; ++ unsigned AddrReg = ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; ++ Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); ++ Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); ++ return DAG.getNode(LoongArchISD::EH_RETURN, DL, MVT::Other, Chain, ++ DAG.getRegister(OffsetReg, Ty), ++ DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), ++ Chain.getValue(1)); ++} ++ ++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, ++ SelectionDAG &DAG) const { ++ // FIXME: Need pseudo-fence for 'singlethread' fences ++ // FIXME: Set SType for weaker fences where supported/appropriate. ++ unsigned SType = 0; ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0), ++ DAG.getConstant(SType, DL, MVT::i32)); ++} ++ ++SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; ++ ++ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); ++ SDValue Shamt = Op.getOperand(2); ++ // if shamt < (VT.bits): ++ // lo = (shl lo, shamt) ++ // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) ++ // else: ++ // lo = 0 ++ // hi = (shl lo, shamt[4:0]) ++ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, ++ DAG.getConstant(-1, DL, MVT::i32)); ++ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, ++ DAG.getConstant(1, DL, VT)); ++ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); ++ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); ++ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); ++ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); ++ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, ++ DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); ++ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ++ DAG.getConstant(0, DL, VT), ShiftLeftLo); ++ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); ++ ++ SDValue Ops[2] = {Lo, Hi}; ++ return DAG.getMergeValues(Ops, DL); ++} ++ ++SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, ++ bool IsSRA) const { ++ SDLoc DL(Op); ++ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); ++ SDValue Shamt = Op.getOperand(2); ++ MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; ++ ++ // if shamt < (VT.bits): ++ // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) ++ // if isSRA: ++ // hi = (sra hi, shamt) ++ // else: ++ // hi = (srl hi, shamt) ++ // else: ++ // if isSRA: ++ // lo = (sra hi, shamt[4:0]) ++ // hi = (sra hi, 31) ++ // else: ++ // lo = (srl hi, shamt[4:0]) ++ // hi = 0 ++ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, ++ DAG.getConstant(-1, DL, MVT::i32)); ++ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, ++ DAG.getConstant(1, DL, VT)); ++ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); ++ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); ++ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); ++ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, ++ DL, VT, Hi, Shamt); ++ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, ++ DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); ++ SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, ++ DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); ++ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); ++ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ++ IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); ++ ++ SDValue Ops[2] = {Lo, Hi}; ++ return DAG.getMergeValues(Ops, DL); ++} ++ ++// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). ++static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, ++ bool SingleFloat) { ++ SDValue Val = SD->getValue(); ++ ++ if (Val.getOpcode() != ISD::FP_TO_SINT || ++ (Val.getValueSizeInBits() > 32 && SingleFloat)) ++ return SDValue(); ++ ++ EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); ++ SDValue Tr = DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Val), FPTy, ++ Val.getOperand(0)); ++ return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), ++ SD->getPointerInfo(), SD->getAlignment(), ++ SD->getMemOperand()->getFlags()); ++} ++ ++SDValue LoongArchTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { ++ StoreSDNode *SD = cast(Op); ++ return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat()); ++} ++ ++SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ unsigned Intrinsic = cast(Op->getOperand(0))->getZExtValue(); ++ switch (Intrinsic) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vaddi_bu: ++ case Intrinsic::loongarch_lsx_vaddi_hu: ++ case Intrinsic::loongarch_lsx_vaddi_wu: ++ case Intrinsic::loongarch_lsx_vaddi_du: ++ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vand_v: ++ case Intrinsic::loongarch_lasx_xvand_v: ++ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vbitclr_b: ++ case Intrinsic::loongarch_lsx_vbitclr_h: ++ case Intrinsic::loongarch_lsx_vbitclr_w: ++ case Intrinsic::loongarch_lsx_vbitclr_d: ++ return lowerLSXBitClear(Op, DAG); ++ case Intrinsic::loongarch_lsx_vdiv_b: ++ case Intrinsic::loongarch_lsx_vdiv_h: ++ case Intrinsic::loongarch_lsx_vdiv_w: ++ case Intrinsic::loongarch_lsx_vdiv_d: ++ return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vdiv_bu: ++ case Intrinsic::loongarch_lsx_vdiv_hu: ++ case Intrinsic::loongarch_lsx_vdiv_wu: ++ case Intrinsic::loongarch_lsx_vdiv_du: ++ return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfdiv_s: ++ case Intrinsic::loongarch_lsx_vfdiv_d: ++ return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vffint_s_wu: ++ case Intrinsic::loongarch_lsx_vffint_d_lu: ++ return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vffint_s_w: ++ case Intrinsic::loongarch_lsx_vffint_d_l: ++ return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vfmul_s: ++ case Intrinsic::loongarch_lsx_vfmul_d: ++ return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfrint_s: ++ case Intrinsic::loongarch_lsx_vfrint_d: ++ return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vfsqrt_s: ++ case Intrinsic::loongarch_lsx_vfsqrt_d: ++ return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vftintrz_wu_s: ++ case Intrinsic::loongarch_lsx_vftintrz_lu_d: ++ return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vpackev_b: ++ case Intrinsic::loongarch_lsx_vpackev_h: ++ case Intrinsic::loongarch_lsx_vpackev_w: ++ case Intrinsic::loongarch_lsx_vpackev_d: ++ return DAG.getNode(LoongArchISD::VPACKEV, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vilvh_b: ++ case Intrinsic::loongarch_lsx_vilvh_h: ++ case Intrinsic::loongarch_lsx_vilvh_w: ++ case Intrinsic::loongarch_lsx_vilvh_d: ++ return DAG.getNode(LoongArchISD::VILVH, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpackod_b: ++ case Intrinsic::loongarch_lsx_vpackod_h: ++ case Intrinsic::loongarch_lsx_vpackod_w: ++ case Intrinsic::loongarch_lsx_vpackod_d: ++ return DAG.getNode(LoongArchISD::VPACKOD, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vilvl_b: ++ case Intrinsic::loongarch_lsx_vilvl_h: ++ case Intrinsic::loongarch_lsx_vilvl_w: ++ case Intrinsic::loongarch_lsx_vilvl_d: ++ return DAG.getNode(LoongArchISD::VILVL, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmadd_b: ++ case Intrinsic::loongarch_lsx_vmadd_h: ++ case Intrinsic::loongarch_lsx_vmadd_w: ++ case Intrinsic::loongarch_lsx_vmadd_d: { ++ EVT ResTy = Op->getValueType(0); ++ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, ++ Op->getOperand(2), Op->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vmax_b: ++ case Intrinsic::loongarch_lsx_vmax_h: ++ case Intrinsic::loongarch_lsx_vmax_w: ++ case Intrinsic::loongarch_lsx_vmax_d: ++ return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmax_bu: ++ case Intrinsic::loongarch_lsx_vmax_hu: ++ case Intrinsic::loongarch_lsx_vmax_wu: ++ case Intrinsic::loongarch_lsx_vmax_du: ++ return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_b: ++ case Intrinsic::loongarch_lsx_vmin_h: ++ case Intrinsic::loongarch_lsx_vmin_w: ++ case Intrinsic::loongarch_lsx_vmin_d: ++ return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_bu: ++ case Intrinsic::loongarch_lsx_vmin_hu: ++ case Intrinsic::loongarch_lsx_vmin_wu: ++ case Intrinsic::loongarch_lsx_vmin_du: ++ return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmini_bu: ++ case Intrinsic::loongarch_lsx_vmini_hu: ++ case Intrinsic::loongarch_lsx_vmini_wu: ++ case Intrinsic::loongarch_lsx_vmini_du: ++ return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmod_b: ++ case Intrinsic::loongarch_lsx_vmod_h: ++ case Intrinsic::loongarch_lsx_vmod_w: ++ case Intrinsic::loongarch_lsx_vmod_d: ++ return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_bu: ++ case Intrinsic::loongarch_lsx_vmod_hu: ++ case Intrinsic::loongarch_lsx_vmod_wu: ++ case Intrinsic::loongarch_lsx_vmod_du: ++ return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmul_b: ++ case Intrinsic::loongarch_lsx_vmul_h: ++ case Intrinsic::loongarch_lsx_vmul_w: ++ case Intrinsic::loongarch_lsx_vmul_d: ++ return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmsub_b: ++ case Intrinsic::loongarch_lsx_vmsub_h: ++ case Intrinsic::loongarch_lsx_vmsub_w: ++ case Intrinsic::loongarch_lsx_vmsub_d: { ++ EVT ResTy = Op->getValueType(0); ++ return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, ++ Op->getOperand(2), Op->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vclz_b: ++ case Intrinsic::loongarch_lsx_vclz_h: ++ case Intrinsic::loongarch_lsx_vclz_w: ++ case Intrinsic::loongarch_lsx_vclz_d: ++ return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vnor_v: ++ case Intrinsic::loongarch_lasx_xvnor_v: { ++ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ return DAG.getNOT(DL, Res, Res->getValueType(0)); ++ } ++ case Intrinsic::loongarch_lsx_vor_v: ++ case Intrinsic::loongarch_lasx_xvor_v: ++ return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpickev_b: ++ case Intrinsic::loongarch_lsx_vpickev_h: ++ case Intrinsic::loongarch_lsx_vpickev_w: ++ case Intrinsic::loongarch_lsx_vpickev_d: ++ return DAG.getNode(LoongArchISD::VPICKEV, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpickod_b: ++ case Intrinsic::loongarch_lsx_vpickod_h: ++ case Intrinsic::loongarch_lsx_vpickod_w: ++ case Intrinsic::loongarch_lsx_vpickod_d: ++ return DAG.getNode(LoongArchISD::VPICKOD, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpcnt_b: ++ case Intrinsic::loongarch_lsx_vpcnt_h: ++ case Intrinsic::loongarch_lsx_vpcnt_w: ++ case Intrinsic::loongarch_lsx_vpcnt_d: ++ return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ case Intrinsic::loongarch_lsx_vsat_du: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_du: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vshuf4i_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_h: ++ case Intrinsic::loongarch_lsx_vshuf4i_w: ++ // case Intrinsic::loongarch_lsx_vshuf4i_d: ++ { ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > 255) ++ report_fatal_error("Immediate out of range"); ++ return DAG.getNode(LoongArchISD::SHF, DL, Op->getValueType(0), ++ Op->getOperand(2), Op->getOperand(1)); ++ } ++ case Intrinsic::loongarch_lsx_vsll_b: ++ case Intrinsic::loongarch_lsx_vsll_h: ++ case Intrinsic::loongarch_lsx_vsll_w: ++ case Intrinsic::loongarch_lsx_vsll_d: ++ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_b: ++ case Intrinsic::loongarch_lsx_vslli_h: ++ case Intrinsic::loongarch_lsx_vslli_w: ++ case Intrinsic::loongarch_lsx_vslli_d: ++ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vreplve_b: ++ case Intrinsic::loongarch_lsx_vreplve_h: ++ case Intrinsic::loongarch_lsx_vreplve_w: ++ case Intrinsic::loongarch_lsx_vreplve_d: ++ // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle ++ // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because ++ // EXTRACT_VECTOR_ELT can't extract i64's on LoongArch32. ++ // Instead we lower to LoongArchISD::VSHF and match from there. ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ lowerLSXSplatZExt(Op, 2, DAG), Op->getOperand(1), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vreplvei_b: ++ case Intrinsic::loongarch_lsx_vreplvei_h: ++ case Intrinsic::loongarch_lsx_vreplvei_w: ++ case Intrinsic::loongarch_lsx_vreplvei_d: ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ lowerLSXSplatImm(Op, 2, DAG), Op->getOperand(1), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vsra_b: ++ case Intrinsic::loongarch_lsx_vsra_h: ++ case Intrinsic::loongarch_lsx_vsra_w: ++ case Intrinsic::loongarch_lsx_vsra_d: ++ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ case Intrinsic::loongarch_lsx_vsrari_d: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_d: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vsrl_b: ++ case Intrinsic::loongarch_lsx_vsrl_h: ++ case Intrinsic::loongarch_lsx_vsrl_w: ++ case Intrinsic::loongarch_lsx_vsrl_d: ++ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_b: ++ case Intrinsic::loongarch_lsx_vsrli_h: ++ case Intrinsic::loongarch_lsx_vsrli_w: ++ case Intrinsic::loongarch_lsx_vsrli_d: ++ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ case Intrinsic::loongarch_lsx_vsrlri_d: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_d: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vsubi_bu: ++ case Intrinsic::loongarch_lsx_vsubi_hu: ++ case Intrinsic::loongarch_lsx_vsubi_wu: ++ case Intrinsic::loongarch_lsx_vsubi_du: ++ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vshuf_h: ++ case Intrinsic::loongarch_lsx_vshuf_w: ++ case Intrinsic::loongarch_lsx_vshuf_d: ++ case Intrinsic::loongarch_lasx_xvshuf_h: ++ case Intrinsic::loongarch_lasx_xvshuf_w: ++ case Intrinsic::loongarch_lasx_xvshuf_d: ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); ++ case Intrinsic::loongarch_lsx_vxor_v: ++ case Intrinsic::loongarch_lasx_xvxor_v: ++ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vrotr_b: ++ case Intrinsic::loongarch_lsx_vrotr_h: ++ case Intrinsic::loongarch_lsx_vrotr_w: ++ case Intrinsic::loongarch_lsx_vrotr_d: ++ return DAG.getNode(LoongArchISD::VROR, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vrotri_b: ++ case Intrinsic::loongarch_lsx_vrotri_h: ++ case Intrinsic::loongarch_lsx_vrotri_w: ++ case Intrinsic::loongarch_lsx_vrotri_d: ++ return DAG.getNode(LoongArchISD::VRORI, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::thread_pointer: { ++ EVT PtrVT = getPointerTy(DAG.getDataLayout()); ++ if (PtrVT == MVT::i64) ++ return DAG.getRegister(LoongArch::TP_64, MVT::i64); ++ return DAG.getRegister(LoongArch::TP, MVT::i32); ++ } ++ } ++} ++ ++SDValue ++LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); ++ switch (Intr) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vld: ++ return lowerLSXLoadIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvld: ++ return lowerLASXLoadIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvldrepl_b: ++ case Intrinsic::loongarch_lasx_xvldrepl_h: ++ case Intrinsic::loongarch_lasx_xvldrepl_w: ++ case Intrinsic::loongarch_lasx_xvldrepl_d: ++ return lowerLASXVLDRIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lsx_vldrepl_b: ++ case Intrinsic::loongarch_lsx_vldrepl_h: ++ case Intrinsic::loongarch_lsx_vldrepl_w: ++ case Intrinsic::loongarch_lsx_vldrepl_d: ++ return lowerLSXVLDRIntr(Op, DAG, Intr, Subtarget); ++ } ++} ++ ++SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); ++ switch (Intr) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vst: ++ return lowerLSXStoreIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvst: ++ return lowerLASXStoreIntr(Op, DAG, Intr, Subtarget); ++ } ++} ++ ++// Lower ISD::EXTRACT_VECTOR_ELT into LoongArchISD::VEXTRACT_SEXT_ELT. ++// ++// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We ++// choose to sign-extend but we could have equally chosen zero-extend. The ++// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT ++// result into this node later (possibly changing it to a zero-extend in the ++// process). ++SDValue ++LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT VecTy = Op0->getValueType(0); ++ ++ if (!VecTy.is128BitVector() && !VecTy.is256BitVector()) ++ return SDValue(); ++ ++ if (ResTy.isInteger()) { ++ SDValue Op1 = Op->getOperand(1); ++ EVT EltTy = VecTy.getVectorElementType(); ++ if (VecTy.is128BitVector()) ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, ++ DAG.getValueType(EltTy)); ++ ++ ConstantSDNode *cn = dyn_cast(Op1); ++ if (!cn) ++ return SDValue(); ++ ++ if (EltTy == MVT::i32 || EltTy == MVT::i64) ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, ++ DAG.getValueType(EltTy)); ++ } ++ ++ return SDValue(); ++} ++ ++SDValue ++LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ ++ MVT VT = Op.getSimpleValueType(); ++ MVT EltVT = VT.getVectorElementType(); ++ ++ SDLoc DL(Op); ++ SDValue Op0 = Op.getOperand(0); ++ SDValue Op1 = Op.getOperand(1); ++ SDValue Op2 = Op.getOperand(2); ++ ++ if (!EltVT.isInteger()) ++ return Op; ++ ++ if (!isa(Op2)) { ++ if (EltVT == MVT::i8 || EltVT == MVT::i16) { ++ return Op; // ==> pseudo ++ // use stack ++ return SDValue(); ++ } else { ++ return Op; ++ } ++ } ++ ++ if (VT.is128BitVector()) ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); ++ ++ if (VT.is256BitVector()) { ++ ++ if (EltVT == MVT::i32 || EltVT == MVT::i64) ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); ++ ++ return Op; ++ } ++ ++ return SDValue(); ++} ++ ++// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the ++// backend. ++// ++// Lowers according to the following rules: ++// - Constant splats are legal as-is as long as the SplatBitSize is a power of ++// 2 less than or equal to 64 and the value fits into a signed 10-bit ++// immediate ++// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize ++// is a power of 2 less than or equal to 64 and the value does not fit into a ++// signed 10-bit immediate ++// - Non-constant splats are legal as-is. ++// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. ++// - All others are illegal and must be expanded. ++SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ BuildVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if ((!Subtarget.hasLSX() || !ResTy.is128BitVector()) && ++ (!Subtarget.hasLASX() || !ResTy.is256BitVector())) ++ return SDValue(); ++ ++ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ 8) && ++ SplatBitSize <= 64) { ++ // We can only cope with 8, 16, 32, or 64-bit elements ++ if ((ResTy.is128BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && ++ SplatBitSize != 32 && SplatBitSize != 64) || ++ (ResTy.is256BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && ++ SplatBitSize != 32 && SplatBitSize != 64)) ++ return SDValue(); ++ ++ // If the value isn't an integer type we will have to bitcast ++ // from an integer type first. Also, if there are any undefs, we must ++ // lower them to defined values first. ++ if (ResTy.isInteger() && !HasAnyUndefs) ++ return Op; ++ ++ EVT ViaVecTy; ++ ++ if ((ResTy.is128BitVector() && ++ !isLSXBySplatBitSize(SplatBitSize, ViaVecTy)) || ++ (ResTy.is256BitVector() && ++ !isLASXBySplatBitSize(SplatBitSize, ViaVecTy))) ++ return SDValue(); ++ ++ // SelectionDAG::getConstant will promote SplatValue appropriately. ++ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); ++ ++ // Bitcast to the type we originally wanted ++ if (ViaVecTy != ResTy) ++ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); ++ ++ return Result; ++ } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) ++ return Op; ++ else if (!isConstantOrUndefBUILD_VECTOR(Node)) { ++ // Use INSERT_VECTOR_ELT operations rather than expand to stores. ++ // The resulting code is the same length as the expansion, but it doesn't ++ // use memory operations ++ EVT ResTy = Node->getValueType(0); ++ ++ assert(ResTy.isVector()); ++ ++ unsigned NumElts = ResTy.getVectorNumElements(); ++ SDValue Vector = DAG.getUNDEF(ResTy); ++ for (unsigned i = 0; i < NumElts; ++i) { ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, ++ Node->getOperand(i), DAG.getConstant(i, DL, MVT::i32)); ++ } ++ return Vector; ++ } ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ Op = LowerSUINT_TO_FP(ISD::ZERO_EXTEND_VECTOR_INREG, Op, DAG); ++ if (!ResTy.isVector()) ++ return Op; ++ return DAG.getNode(ISD::UINT_TO_FP, DL, ResTy, Op); ++} ++ ++SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ Op = LowerSUINT_TO_FP(ISD::SIGN_EXTEND_VECTOR_INREG, Op, DAG); ++ if (!ResTy.isVector()) ++ return Op; ++ return DAG.getNode(ISD::SINT_TO_FP, DL, ResTy, Op); ++} ++ ++SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (!Op->getValueType(0).isVector()) ++ return SDValue(); ++ return LowerFP_TO_SUINT(ISD::FP_TO_UINT, ISD::ZERO_EXTEND_VECTOR_INREG, Op, ++ DAG); ++} ++ ++SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (Op->getValueType(0).isVector()) ++ return LowerFP_TO_SUINT(ISD::FP_TO_SINT, ISD::SIGN_EXTEND_VECTOR_INREG, Op, ++ DAG); ++ ++ if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat()) ++ return SDValue(); ++ ++ EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); ++ SDValue Trunc = ++ DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); ++ return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); ++} ++ ++static bool checkUndef(ArrayRef Mask, int Lo, int Hi) { ++ ++ for (int i = Lo, end = Hi; i != end; i++, Hi++) ++ if (!((Mask[i] == -1) || (Mask[i] == Hi))) ++ return false; ++ return true; ++} ++ ++static bool CheckRev(ArrayRef Mask) { ++ ++ int Num = Mask.size() - 1; ++ for (long unsigned int i = 0; i < Mask.size(); i++, Num--) ++ if (Mask[i] != Num) ++ return false; ++ return true; ++} ++ ++static bool checkHalf(ArrayRef Mask, int Lo, int Hi, int base) { ++ ++ for (int i = Lo; i < Hi; i++) ++ if (Mask[i] != (base + i)) ++ return false; ++ return true; ++} ++ ++static SDValue lowerHalfHalf(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, ++ ArrayRef Mask, SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ int HalfNum = Num / 2; ++ ++ if (Op1->isUndef() || Op2->isUndef() || Mask.size() > (long unsigned int)Num) ++ return SDValue(); ++ ++ if (checkHalf(Mask, HalfNum, Num, Num) && checkHalf(Mask, 0, HalfNum, 0)) { ++ return SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(48, DL, MVT::i32)), ++ 0); ++ } ++ ++ return SDValue(); ++} ++ ++static bool checkHalfUndef(ArrayRef Mask, int Lo, int Hi) { ++ ++ for (int i = Lo; i < Hi; i++) ++ if (Mask[i] != -1) ++ return false; ++ return true; ++} ++ ++// Lowering vectors with half undef data, ++// use EXTRACT_SUBVECTOR and INSERT_SUBVECTOR instead of VECTOR_SHUFFLE ++static SDValue lowerHalfUndef(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, ++ ArrayRef Mask, SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ int HalfNum = Num / 2; ++ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNum); ++ MVT VT1 = Op1.getSimpleValueType(); ++ SDValue Op; ++ ++ bool check1 = Op1->isUndef() && (!Op2->isUndef()); ++ bool check2 = Op2->isUndef() && (!Op1->isUndef()); ++ ++ if ((check1 || check2) && (VT1 == VT)) { ++ if (check1) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op2); ++ } else if (check2) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op1); ++ } ++ ++ if (VT == MVT::v32i8 && CheckRev(Mask)) { ++ SDValue Vector; ++ SDValue Rev[4]; ++ SDValue Ext[4]; ++ for (int i = 0; i < 4; i++) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); ++ } ++ ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, DAG.getUNDEF(VT), ++ Rev[3], DAG.getConstant(3, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[2], DAG.getConstant(2, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[1], DAG.getConstant(1, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[0], DAG.getConstant(0, DL, MVT::i32)); ++ ++ Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, Vector); ++ ++ return Vector; ++ } ++ } ++ ++ if (checkHalfUndef(Mask, HalfNum, Num) && checkUndef(Mask, 0, HalfNum)) { ++ SDValue High = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, ++ DAG.getConstant(HalfNum, DL, MVT::i64)); ++ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), High, ++ DAG.getConstant(0, DL, MVT::i64)); ++ } ++ ++ if (checkHalfUndef(Mask, HalfNum, Num) && (VT == MVT::v8i32) && ++ (Mask[0] == 0) && (Mask[1] == 1) && (Mask[2] == (Num + 2)) && ++ (Mask[3] == (Num + 3))) { ++ ++ SDValue Val1 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(32, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val2 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, ++ DAG.getTargetConstant(12, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val3 = SDValue( ++ DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), ++ DAG.getTargetConstant(2, DL, MVT::i32)), ++ 0); ++ return Val3; ++ } ++ ++ if (checkHalfUndef(Mask, 0, HalfNum) && checkUndef(Mask, HalfNum, Num)) { ++ SDValue Low = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, ++ DAG.getConstant(0, DL, MVT::i32)); ++ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Low, ++ DAG.getConstant(HalfNum, DL, MVT::i32)); ++ } ++ ++ if (checkHalfUndef(Mask, 0, HalfNum) && (VT == MVT::v8i32) && ++ (Mask[HalfNum] == HalfNum) && (Mask[HalfNum + 1] == (HalfNum + 1)) && ++ (Mask[HalfNum + 2] == (2 * Num - 2)) && ++ (Mask[HalfNum + 3] == (2 * Num - 1))) { ++ ++ SDValue Val1 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(49, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val2 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, ++ DAG.getTargetConstant(12, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val3 = SDValue( ++ DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), ++ DAG.getTargetConstant(32, DL, MVT::i32)), ++ 0); ++ return Val3; ++ } ++ ++ if ((VT == MVT::v8i32) || (VT == MVT::v4i64)) { ++ int def = 0; ++ int j = 0; ++ int ext[3]; ++ int ins[3]; ++ bool useOp1[3] = {true, true, true}; ++ bool checkdef = true; ++ ++ for (int i = 0; i < Num; i++) { ++ if (def > 2) { ++ checkdef = false; ++ break; ++ } ++ if (Mask[i] != -1) { ++ def++; ++ ins[j] = i; ++ if (Mask[i] >= Num) { ++ ext[j] = Mask[i] - Num; ++ useOp1[j] = false; ++ } else { ++ ext[j] = Mask[i]; ++ } ++ j++; ++ } ++ } ++ ++ if (checkdef) { ++ SDValue Vector = DAG.getUNDEF(VT); ++ EVT EltTy = VT.getVectorElementType(); ++ SDValue Ext[2]; ++ ++ if (check1 || check2) { ++ for (int i = 0; i < def; i++) { ++ if (check1) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } else if (check2) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } ++ } ++ return Vector; ++ } else { ++ for (int i = 0; i < def; i++) { ++ if (!useOp1[i]) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } else { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } ++ } ++ return Vector; ++ } ++ } ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerHalfUndef_LSX(const SDLoc &DL, EVT ResTy, MVT VT, ++ SDValue Op1, SDValue Op2, ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ MVT VT1 = Op1.getSimpleValueType(); ++ ++ bool check1 = Op1->isUndef() && (!Op2->isUndef()); ++ bool check2 = Op2->isUndef() && (!Op1->isUndef()); ++ ++ if ((check1 || check2) && (VT1 == VT)) { ++ SDValue Op; ++ ++ if (VT == MVT::v16i8 && CheckRev(Mask)) { ++ ++ if (check1) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op2); ++ } else if (check2) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op1); ++ } ++ ++ SDValue Vector; ++ SDValue Rev[2]; ++ SDValue Ext[2]; ++ for (int i = 0; i < 2; i++) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); ++ } ++ ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, DAG.getUNDEF(VT), ++ Rev[1], DAG.getConstant(1, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Vector, ++ Rev[0], DAG.getConstant(0, DL, MVT::i32)); ++ ++ Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Vector); ++ ++ return Vector; ++ } ++ } ++ ++ return SDValue(); ++} ++ ++// Use SDNode of LoongArchINSVE instead of ++// a series of EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT ++static SDValue lowerVECTOR_SHUFFLE_INSVE(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8) ++ return SDValue(); ++ ++ int CheckOne = 0; ++ int CheckOther = 0; ++ int Idx; ++ ++ for (int i = 0; i < Num; i++) { ++ if ((Mask[i] == i) || (Mask[i] == -1)) { ++ CheckOther++; ++ } else if (Mask[i] == Num) { ++ CheckOne++; ++ Idx = i; ++ } else ++ return SDValue(); ++ } ++ ++ if ((CheckOne != 1) || (CheckOther != (Num - 1))) ++ return SDValue(); ++ else { ++ return DAG.getNode(LoongArchISD::INSVE, DL, ResTy, Op1, Op2, ++ DAG.getConstant(Idx, DL, MVT::i32)); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKVE(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8 || ++ (!ISD::isBuildVectorAllZeros(Op1.getNode()))) ++ return SDValue(); ++ ++ bool CheckV = true; ++ ++ if ((Mask[0] < Num) || (Mask[0] > (2 * Num - 1))) ++ CheckV = false; ++ ++ for (int i = 1; i < Num; i++) { ++ if (Mask[i] != 0) { ++ CheckV = false; ++ break; ++ } ++ } ++ ++ if (!CheckV) ++ return SDValue(); ++ else { ++ return DAG.getNode(LoongArchISD::XVPICKVE, DL, ResTy, Op1, Op2, ++ DAG.getConstant(Mask[0] - Num, DL, MVT::i32)); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ if (VT == MVT::v4i64) { ++ int Num = VT.getVectorNumElements(); ++ ++ bool CheckV = true; ++ for (int i = 0; i < Num; i++) { ++ if (Mask[i] != (i * 2)) { ++ CheckV = false; ++ break; ++ } ++ } ++ ++ if (!CheckV) ++ return SDValue(); ++ else { ++ SDValue Res = DAG.getNode(LoongArchISD::XVSHUF4I, DL, ResTy, Op1, Op2, ++ DAG.getConstant(8, DL, MVT::i32)); ++ return DAG.getNode(LoongArchISD::XVPERMI, DL, ResTy, Res, ++ DAG.getConstant(0xD8, DL, MVT::i32)); ++ } ++ } else ++ return SDValue(); ++} ++ ++// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the ++// indices in the shuffle. ++SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, ++ SelectionDAG &DAG) const { ++ ShuffleVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ ArrayRef Mask = Node->getMask(); ++ SDValue Op1 = Op.getOperand(0); ++ SDValue Op2 = Op.getOperand(1); ++ MVT VT = Op.getSimpleValueType(); ++ SDLoc DL(Op); ++ ++ if (ResTy.is128BitVector()) { ++ ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ SmallVector Indices; ++ ++ for (int i = 0; i < ResTyNumElts; ++i) ++ Indices.push_back(Node->getMaskElt(i)); ++ ++ SDValue Result; ++ if (isVECTOR_SHUFFLE_VREPLVEI(Op, ResTy, Indices, DAG)) ++ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); ++ if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VILVH(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VILVL(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerHalfUndef_LSX(DL, ResTy, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); ++ ++ } else if (ResTy.is256BitVector()) { ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ SmallVector Indices; ++ ++ for (int i = 0; i < ResTyNumElts; ++i) ++ Indices.push_back(Node->getMaskElt(i)); ++ ++ SDValue Result; ++ if ((Result = lowerHalfHalf(DL, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = lowerHalfUndef(DL, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ if (isVECTOR_SHUFFLE_XVREPLVEI(Op, ResTy, Indices, DAG)) ++ return SDValue(); ++ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVILVH(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVILVL(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XSHF(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_INSVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_XVPICKVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_XVSHUF(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ } ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, ++ SelectionDAG &DAG) const { ++ ++ // Return a fixed StackObject with offset 0 which points to the old stack ++ // pointer. ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ EVT ValTy = Op->getValueType(0); ++ int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); ++ return DAG.getFrameIndex(FI, ValTy); ++} ++ ++// Check whether the tail call optimization conditions are met ++bool LoongArchTargetLowering::isEligibleForTailCallOptimization( ++ const CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, ++ unsigned NextStackOffset, const LoongArchFunctionInfo &FI) const { ++ ++ auto CalleeCC = CLI.CallConv; ++ auto IsVarArg = CLI.IsVarArg; ++ auto &Outs = CLI.Outs; ++ auto &Caller = MF.getFunction(); ++ auto CallerCC = Caller.getCallingConv(); ++ ++ if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") ++ return false; ++ ++ if (Caller.hasFnAttribute("interrupt")) ++ return false; ++ ++ if (IsVarArg) ++ return false; ++ ++ if (getTargetMachine().getCodeModel() == CodeModel::Large) ++ return false; ++ ++ if (getTargetMachine().getRelocationModel() == Reloc::Static) ++ return false; ++ ++ // Do not tail call optimize if the stack is used to pass parameters. ++ if (CCInfo.getNextStackOffset() != 0) ++ return false; ++ ++ // Do not tail call optimize functions with byval parameters. ++ for (auto &Arg : Outs) ++ if (Arg.Flags.isByVal()) ++ return false; ++ ++ // Do not tail call optimize if either caller or callee uses structret ++ // semantics. ++ auto IsCallerStructRet = Caller.hasStructRetAttr(); ++ auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); ++ if (IsCallerStructRet || IsCalleeStructRet) ++ return false; ++ ++ // The callee has to preserve all registers the caller needs to preserve. ++ const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); ++ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); ++ if (CalleeCC != CallerCC) { ++ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); ++ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) ++ return false; ++ } ++ ++ // Return false if either the callee or caller has a byval argument. ++ if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) ++ return false; ++ ++ // Return true if the callee's argument area is no larger than the ++ // caller's. ++ return NextStackOffset <= FI.getIncomingArgSize(); ++} ++ ++//===----------------------------------------------------------------------===// ++// Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// TODO: Implement a generic logic using tblgen that can support this. ++// LoongArch LP32 ABI rules: ++// --- ++// i32 - Passed in A0, A1, A2, A3 and stack ++// f32 - Only passed in f32 registers if no int reg has been used yet to hold ++// an argument. Otherwise, passed in A1, A2, A3 and stack. ++// f64 - Only passed in two aliased f32 registers if no int reg has been used ++// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is ++// not used, it must be shadowed. If only A3 is available, shadow it and ++// go to stack. ++// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. ++// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} ++// with the remainder spilled to the stack. ++// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases ++// spilling the remainder to the stack. ++// ++// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. ++//===----------------------------------------------------------------------===// ++ ++static bool CC_LoongArchLP32(unsigned ValNo, MVT ValVT, MVT LocVT, ++ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, ++ CCState &State, ArrayRef F64Regs) { ++ static const MCPhysReg IntRegs[] = { LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 }; ++ ++ const LoongArchCCState * LoongArchState = static_cast(&State); ++ ++ static const MCPhysReg F32Regs[] = { LoongArch::F12, LoongArch::F14 }; ++ ++ static const MCPhysReg FloatVectorIntRegs[] = { LoongArch::A0, LoongArch::A2 }; ++ ++ // Do not process byval args here. ++ if (ArgFlags.isByVal()) ++ return true; ++ ++ ++ // Promote i8 and i16 ++ if (LocVT == MVT::i8 || LocVT == MVT::i16) { ++ LocVT = MVT::i32; ++ if (ArgFlags.isSExt()) ++ LocInfo = CCValAssign::SExt; ++ else if (ArgFlags.isZExt()) ++ LocInfo = CCValAssign::ZExt; ++ else ++ LocInfo = CCValAssign::AExt; ++ } ++ ++ unsigned Reg; ++ ++ // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following ++ // is true: function is vararg, argument is 3rd or higher, there is previous ++ // argument which is not f32 or f64. ++ bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || ++ State.getFirstUnallocated(F32Regs) != ValNo; ++ Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); ++ bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8)); ++ bool isVectorFloat = LoongArchState->WasOriginalArgVectorFloat(ValNo); ++ ++ // The LoongArch vector ABI for floats passes them in a pair of registers ++ if (ValVT == MVT::i32 && isVectorFloat) { ++ // This is the start of an vector that was scalarized into an unknown number ++ // of components. It doesn't matter how many there are. Allocate one of the ++ // notional 8 byte aligned registers which map onto the argument stack, and ++ // shadow the register lost to alignment requirements. ++ if (ArgFlags.isSplit()) { ++ Reg = State.AllocateReg(FloatVectorIntRegs); ++ if (Reg == LoongArch::A2) ++ State.AllocateReg(LoongArch::A1); ++ else if (Reg == 0) ++ State.AllocateReg(LoongArch::A3); ++ } else { ++ // If we're an intermediate component of the split, we can just attempt to ++ // allocate a register directly. ++ Reg = State.AllocateReg(IntRegs); ++ } ++ } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { ++ Reg = State.AllocateReg(IntRegs); ++ // If this is the first part of an i64 arg, ++ // the allocated register must be either A0 or A2. ++ if (isI64 && (Reg == LoongArch::A1 || Reg == LoongArch::A3)) ++ Reg = State.AllocateReg(IntRegs); ++ LocVT = MVT::i32; ++ } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { ++ // Allocate int register and shadow next int register. If first ++ // available register is LoongArch::A1 or LoongArch::A3, shadow it too. ++ Reg = State.AllocateReg(IntRegs); ++ if (Reg == LoongArch::A1 || Reg == LoongArch::A3) ++ Reg = State.AllocateReg(IntRegs); ++ State.AllocateReg(IntRegs); ++ LocVT = MVT::i32; ++ } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { ++ // we are guaranteed to find an available float register ++ if (ValVT == MVT::f32) { ++ Reg = State.AllocateReg(F32Regs); ++ // Shadow int register ++ State.AllocateReg(IntRegs); ++ } else { ++ Reg = State.AllocateReg(F64Regs); ++ // Shadow int registers ++ unsigned Reg2 = State.AllocateReg(IntRegs); ++ if (Reg2 == LoongArch::A1 || Reg2 == LoongArch::A3) ++ State.AllocateReg(IntRegs); ++ State.AllocateReg(IntRegs); ++ } ++ } else ++ llvm_unreachable("Cannot handle this ValVT."); ++ ++ if (!Reg) { ++ unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); ++ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); ++ } else ++ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); ++ ++ return false; ++} ++ ++static bool CC_LoongArchLP32_FP32(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ ++ LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ ++ LoongArch::F6_64, LoongArch::F7_64 }; ++ ++ return CC_LoongArchLP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); ++} ++ ++static bool CC_LoongArchLP32_FP64(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ ++ LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ ++ LoongArch::F6_64, LoongArch::F7_64 }; ++ ++ return CC_LoongArchLP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); ++} ++ ++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; ++ ++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ ++ static const MCPhysReg ArgRegs[8] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, ++ LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; ++ ++ unsigned Idx = State.getFirstUnallocated(ArgRegs); ++ // Skip 'odd' register if necessary. ++ if (!ArgFlags.isSplitEnd() && Idx != array_lengthof(ArgRegs) && Idx % 2 == 1) ++ State.AllocateReg(ArgRegs); ++ return true; ++} ++ ++static bool CC_LoongArchLP32(unsigned ValNo, MVT ValVT, MVT LocVT, ++ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, ++ CCState &State) LLVM_ATTRIBUTE_UNUSED; ++ ++#include "LoongArchGenCallingConv.inc" ++ ++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForCall() const{ ++ return CC_LoongArch; ++ } ++ ++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForReturn() const{ ++ return RetCC_LoongArch; ++ } ++ ++//===----------------------------------------------------------------------===// ++// Call Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++SDValue LoongArchTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, ++ SDValue Chain, SDValue Arg, ++ const SDLoc &DL, bool IsTailCall, ++ SelectionDAG &DAG) const { ++ if (!IsTailCall) { ++ SDValue PtrOff = ++ DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, ++ DAG.getIntPtrConstant(Offset, DL)); ++ return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); ++ } ++ ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); ++ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), ++ /* Alignment = */ 0, MachineMemOperand::MOVolatile); ++} ++ ++void LoongArchTargetLowering::getOpndList( ++ SmallVectorImpl &Ops, ++ std::deque> &RegsToPass, bool IsPICCall, ++ bool GlobalOrExternal, bool IsCallReloc, CallLoweringInfo &CLI, ++ SDValue Callee, SDValue Chain, bool IsTailCall) const { ++ // Build a sequence of copy-to-reg nodes chained together with token ++ // chain and flag operands which copy the outgoing args into registers. ++ // The InFlag in necessary since all emitted instructions must be ++ // stuck together. ++ SDValue InFlag; ++ ++ Ops.push_back(Callee); ++ ++ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { ++ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, ++ RegsToPass[i].second, InFlag); ++ InFlag = Chain.getValue(1); ++ } ++ ++ // Add argument registers to the end of the list so that they are ++ // known live into the call. ++ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) ++ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, ++ RegsToPass[i].second.getValueType())); ++ ++ if (!IsTailCall) { ++ // Add a register mask operand representing the call-preserved registers. ++ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); ++ const uint32_t *Mask = ++ TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); ++ assert(Mask && "Missing call preserved mask for calling convention"); ++ Ops.push_back(CLI.DAG.getRegisterMask(Mask)); ++ } ++ ++ if (InFlag.getNode()) ++ Ops.push_back(InFlag); ++} ++ ++void LoongArchTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, ++ SDNode *Node) const { ++ switch (MI.getOpcode()) { ++ default: ++ return; ++ } ++} ++ ++/// LowerCall - functions arguments are copied from virtual regs to ++/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. ++SDValue ++LoongArchTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ++ SmallVectorImpl &InVals) const { ++ SelectionDAG &DAG = CLI.DAG; ++ SDLoc DL = CLI.DL; ++ SmallVectorImpl &Outs = CLI.Outs; ++ SmallVectorImpl &OutVals = CLI.OutVals; ++ SmallVectorImpl &Ins = CLI.Ins; ++ SDValue Chain = CLI.Chain; ++ SDValue Callee = CLI.Callee; ++ bool &IsTailCall = CLI.IsTailCall; ++ CallingConv::ID CallConv = CLI.CallConv; ++ bool IsVarArg = CLI.IsVarArg; ++ ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); ++ bool IsPIC = isPositionIndependent(); ++ ++ // Analyze operands of the call, assigning locations to each operand. ++ SmallVector ArgLocs; ++ LoongArchCCState CCInfo( ++ CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), ++ LoongArchCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); ++ ++ const ExternalSymbolSDNode *ES = ++ dyn_cast_or_null(Callee.getNode()); ++ ++ // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which ++ // is during the lowering of a call with a byval argument which produces ++ // a call to memcpy. For the LP32 case, this causes the caller to allocate ++ // stack space for the reserved argument area for the callee, then recursively ++ // again for the memcpy call. In the NEWABI case, this doesn't occur as those ++ // ABIs mandate that the callee allocates the reserved argument area. We do ++ // still produce nested CALLSEQ_START..CALLSEQ_END with zero space though. ++ // ++ // If the callee has a byval argument and memcpy is used, we are mandated ++ // to already have produced a reserved argument area for the callee for LP32. ++ // Therefore, the reserved argument area can be reused for both calls. ++ // ++ // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START ++ // present, as we have yet to hook that node onto the chain. ++ // ++ // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this ++ // case. GCC does a similar trick, in that wherever possible, it calculates ++ // the maximum out going argument area (including the reserved area), and ++ // preallocates the stack space on entrance to the caller. ++ // ++ // FIXME: We should do the same for efficiency and space. ++ ++ bool MemcpyInByVal = ES && ++ StringRef(ES->getSymbol()) == StringRef("memcpy") && ++ Chain.getOpcode() == ISD::CALLSEQ_START; ++ ++ CCInfo.AnalyzeCallOperands(Outs, CC_LoongArch, CLI.getArgs(), ++ ES ? ES->getSymbol() : nullptr); ++ ++ // Get a count of how many bytes are to be pushed on the stack. ++ unsigned NextStackOffset = CCInfo.getNextStackOffset(); ++ ++ // Check if it's really possible to do a tail call. Restrict it to functions ++ // that are part of this compilation unit. ++ if (IsTailCall) { ++ IsTailCall = isEligibleForTailCallOptimization( ++ CCInfo, CLI, MF, NextStackOffset, *MF.getInfo()); ++ if (GlobalAddressSDNode *G = dyn_cast(Callee)) { ++ if (G->getGlobal()->hasExternalWeakLinkage()) ++ IsTailCall = false; ++ } ++ } ++ if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) ++ report_fatal_error("failed to perform tail call elimination on a call " ++ "site marked musttail"); ++ ++ if (IsTailCall) ++ ++NumTailCalls; ++ ++ // Chain is the output chain of the last Load/Store or CopyToReg node. ++ // ByValChain is the output chain of the last Memcpy node created for copying ++ // byval arguments to the stack. ++ unsigned StackAlignment = TFL->getStackAlignment(); ++ NextStackOffset = alignTo(NextStackOffset, StackAlignment); ++ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); ++ ++ if (!(IsTailCall || MemcpyInByVal)) ++ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); ++ ++ SDValue StackPtr = ++ DAG.getCopyFromReg(Chain, DL, ABI.IsLP64() ? LoongArch::SP_64 : LoongArch::SP, ++ getPointerTy(DAG.getDataLayout())); ++ ++ std::deque> RegsToPass; ++ SmallVector MemOpChains; ++ ++ CCInfo.rewindByValRegsInfo(); ++ ++ // Walk the register/memloc assignments, inserting copies/loads. ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ SDValue Arg = OutVals[i]; ++ CCValAssign &VA = ArgLocs[i]; ++ MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ++ ISD::ArgFlagsTy Flags = Outs[i].Flags; ++ bool UseUpperBits = false; ++ ++ // ByVal Arg. ++ if (Flags.isByVal()) { ++ unsigned FirstByValReg, LastByValReg; ++ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); ++ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); ++ ++ assert(Flags.getByValSize() && ++ "ByVal args of size 0 should have been ignored by front-end."); ++ assert(ByValIdx < CCInfo.getInRegsParamsCount()); ++ assert(!IsTailCall && ++ "Do not tail-call optimize if there is a byval argument."); ++ passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, ++ FirstByValReg, LastByValReg, Flags, ++ VA); ++ CCInfo.nextInRegsParam(); ++ continue; ++ } ++ ++ // Promote the value if needed. ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ if (VA.isRegLoc()) { ++ if ((ValVT == MVT::f32 && LocVT == MVT::i32) || ++ (ValVT == MVT::f64 && LocVT == MVT::i64) || ++ (ValVT == MVT::i64 && LocVT == MVT::f64)) ++ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); ++ } ++ break; ++ case CCValAssign::BCvt: ++ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); ++ break; ++ case CCValAssign::SExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::SExt: ++ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); ++ break; ++ case CCValAssign::ZExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::ZExt: ++ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); ++ break; ++ case CCValAssign::AExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::AExt: ++ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); ++ break; ++ } ++ ++ if (UseUpperBits) { ++ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ Arg = DAG.getNode( ++ ISD::SHL, DL, VA.getLocVT(), Arg, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ // Arguments that can be passed on register must be kept at ++ // RegsToPass vector ++ if (VA.isRegLoc()) { ++ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); ++ continue; ++ } ++ ++ // Register can't get to this point... ++ assert(VA.isMemLoc()); ++ ++ // emit ISD::STORE whichs stores the ++ // parameter value to a stack Location ++ MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), ++ Chain, Arg, DL, IsTailCall, DAG)); ++ } ++ ++ // Transform all store nodes into one single node because all store ++ // nodes are independent of each other. ++ if (!MemOpChains.empty()) ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); ++ ++ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every ++ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol ++ // node so that legalize doesn't hack it. ++ ++ bool GlobalOrExternal = false, IsCallReloc = false; ++ ++ if (GlobalAddressSDNode *G = dyn_cast(Callee)) { ++ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, ++ getPointerTy(DAG.getDataLayout()), 0, ++ LoongArchII::MO_NO_FLAG); ++ GlobalOrExternal = true; ++ } ++ else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { ++ const char *Sym = S->getSymbol(); ++ Callee = DAG.getTargetExternalSymbol( ++ Sym, getPointerTy(DAG.getDataLayout()), LoongArchII::MO_NO_FLAG); ++ ++ GlobalOrExternal = true; ++ } ++ ++ SmallVector Ops(1, Chain); ++ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); ++ ++ getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, IsCallReloc, CLI, ++ Callee, Chain, IsTailCall); ++ ++ if (IsTailCall) { ++ MF.getFrameInfo().setHasTailCall(); ++ return DAG.getNode(LoongArchISD::TailCall, DL, MVT::Other, Ops); ++ } ++ ++ Chain = DAG.getNode(LoongArchISD::JmpLink, DL, NodeTys, Ops); ++ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); ++ SDValue InFlag = Chain.getValue(1); ++ ++ // Create the CALLSEQ_END node in the case of where it is not a call to ++ // memcpy. ++ if (!(MemcpyInByVal)) { ++ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, ++ DAG.getIntPtrConstant(0, DL, true), InFlag, DL); ++ InFlag = Chain.getValue(1); ++ } ++ ++ // Handle result values, copying them out of physregs into vregs that we ++ // return. ++ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, ++ InVals, CLI); ++} ++ ++/// LowerCallResult - Lower the result values of a call into the ++/// appropriate copies out of appropriate physical registers. ++SDValue LoongArchTargetLowering::LowerCallResult( ++ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, ++ const SmallVectorImpl &Ins, const SDLoc &DL, ++ SelectionDAG &DAG, SmallVectorImpl &InVals, ++ TargetLowering::CallLoweringInfo &CLI) const { ++ // Assign locations to each value returned by this call. ++ SmallVector RVLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, ++ *DAG.getContext()); ++ ++ const ExternalSymbolSDNode *ES = ++ dyn_cast_or_null(CLI.Callee.getNode()); ++ CCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch, CLI.RetTy, ++ ES ? ES->getSymbol() : nullptr); ++ ++ // Copy all of the result registers out of their specified physreg. ++ for (unsigned i = 0; i != RVLocs.size(); ++i) { ++ CCValAssign &VA = RVLocs[i]; ++ assert(VA.isRegLoc() && "Can only return in registers!"); ++ ++ SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), ++ RVLocs[i].getLocVT(), InFlag); ++ Chain = Val.getValue(1); ++ InFlag = Val.getValue(2); ++ ++ if (VA.isUpperBitsInLoc()) { ++ unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ unsigned Shift = ++ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; ++ Val = DAG.getNode( ++ Shift, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::AExt: ++ case CCValAssign::AExtUpper: ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::ZExt: ++ case CCValAssign::ZExtUpper: ++ Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, ++ DAG.getValueType(VA.getValVT())); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::SExt: ++ case CCValAssign::SExtUpper: ++ Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, ++ DAG.getValueType(VA.getValVT())); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ } ++ ++ InVals.push_back(Val); ++ } ++ ++ return Chain; ++} ++ ++static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, ++ EVT ArgVT, const SDLoc &DL, ++ SelectionDAG &DAG) { ++ MVT LocVT = VA.getLocVT(); ++ EVT ValVT = VA.getValVT(); ++ ++ // Shift into the upper bits if necessary. ++ switch (VA.getLocInfo()) { ++ default: ++ break; ++ case CCValAssign::AExtUpper: ++ case CCValAssign::SExtUpper: ++ case CCValAssign::ZExtUpper: { ++ unsigned ValSizeInBits = ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ unsigned Opcode = ++ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; ++ Val = DAG.getNode( ++ Opcode, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ break; ++ } ++ } ++ ++ // If this is an value smaller than the argument slot size (32-bit for LP32, ++ // 64-bit for LPX32/LP64), it has been promoted in some way to the argument slot ++ // size. Extract the value and insert any appropriate assertions regarding ++ // sign/zero extension. ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::AExtUpper: ++ case CCValAssign::AExt: ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ break; ++ case CCValAssign::SExtUpper: ++ case CCValAssign::SExt: { ++ if ((ArgVT == MVT::i1) || (ArgVT == MVT::i8) || (ArgVT == MVT::i16)) { ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ Val = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ValVT, ++ Val, SubReg), ++ 0); ++ } else { ++ Val = ++ DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ } ++ break; ++ } ++ case CCValAssign::ZExtUpper: ++ case CCValAssign::ZExt: ++ Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); ++ break; ++ } ++ ++ return Val; ++} ++ ++//===----------------------------------------------------------------------===// ++// Formal Arguments Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++/// LowerFormalArguments - transform physical registers into virtual registers ++/// and generate load operations for arguments places on the stack. ++SDValue LoongArchTargetLowering::LowerFormalArguments( ++ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, ++ const SmallVectorImpl &Ins, const SDLoc &DL, ++ SelectionDAG &DAG, SmallVectorImpl &InVals) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ LoongArchFI->setVarArgsFrameIndex(0); ++ ++ // Used with vargs to acumulate store chains. ++ std::vector OutChains; ++ ++ // Assign locations to all of the incoming arguments. ++ SmallVector ArgLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, ++ *DAG.getContext()); ++ const Function &Func = DAG.getMachineFunction().getFunction(); ++ Function::const_arg_iterator FuncArg = Func.arg_begin(); ++ ++ CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_FixedArg); ++ LoongArchFI->setFormalArgInfo(CCInfo.getNextStackOffset(), ++ CCInfo.getInRegsParamsCount() > 0); ++ ++ unsigned CurArgIdx = 0; ++ CCInfo.rewindByValRegsInfo(); ++ ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ CCValAssign &VA = ArgLocs[i]; ++ if (Ins[i].isOrigArg()) { ++ std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); ++ CurArgIdx = Ins[i].getOrigArgIndex(); ++ } ++ EVT ValVT = VA.getValVT(); ++ ISD::ArgFlagsTy Flags = Ins[i].Flags; ++ bool IsRegLoc = VA.isRegLoc(); ++ ++ if (Flags.isByVal()) { ++ assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); ++ unsigned FirstByValReg, LastByValReg; ++ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); ++ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); ++ ++ assert(Flags.getByValSize() && ++ "ByVal args of size 0 should have been ignored by front-end."); ++ assert(ByValIdx < CCInfo.getInRegsParamsCount()); ++ copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, ++ FirstByValReg, LastByValReg, VA, CCInfo); ++ CCInfo.nextInRegsParam(); ++ continue; ++ } ++ ++ // Arguments stored on registers ++ if (IsRegLoc) { ++ MVT RegVT = VA.getLocVT(); ++ unsigned ArgReg = VA.getLocReg(); ++ const TargetRegisterClass *RC = getRegClassFor(RegVT); ++ ++ // Transform the arguments stored on ++ // physical registers into virtual ones ++ unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); ++ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); ++ ++ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); ++ ++ // Handle floating point arguments passed in integer registers and ++ // long double arguments passed in floating point registers. ++ if ((RegVT == MVT::i32 && ValVT == MVT::f32) || ++ (RegVT == MVT::i64 && ValVT == MVT::f64) || ++ (RegVT == MVT::f64 && ValVT == MVT::i64)) ++ ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); ++ else if (ABI.IsLP32() && RegVT == MVT::i32 && ++ ValVT == MVT::f64) { ++ // TODO: lp32 ++ } ++ ++ InVals.push_back(ArgValue); ++ } else { // VA.isRegLoc() ++ MVT LocVT = VA.getLocVT(); ++ ++ if (ABI.IsLP32()) { ++ // We ought to be able to use LocVT directly but LP32 sets it to i32 ++ // when allocating floating point values to integer registers. ++ // This shouldn't influence how we load the value into registers unless ++ // we are targeting softfloat. ++ if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) ++ LocVT = VA.getValVT(); ++ } ++ ++ // sanity check ++ assert(VA.isMemLoc()); ++ ++ // The stack pointer offset is relative to the caller stack frame. ++ int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ++ VA.getLocMemOffset(), true); ++ ++ // Create load nodes to retrieve arguments from the stack ++ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ SDValue ArgValue = DAG.getLoad( ++ LocVT, DL, Chain, FIN, ++ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); ++ OutChains.push_back(ArgValue.getValue(1)); ++ ++ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); ++ ++ InVals.push_back(ArgValue); ++ } ++ } ++ ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ // The loongarch ABIs for returning structs by value requires that we copy ++ // the sret argument into $v0 for the return. Save the argument into ++ // a virtual register so that we can access it from the return points. ++ if (Ins[i].Flags.isSRet()) { ++ unsigned Reg = LoongArchFI->getSRetReturnReg(); ++ if (!Reg) { ++ Reg = MF.getRegInfo().createVirtualRegister( ++ getRegClassFor(ABI.IsLP64() ? MVT::i64 : MVT::i32)); ++ LoongArchFI->setSRetReturnReg(Reg); ++ } ++ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); ++ break; ++ } ++ } ++ ++ if (IsVarArg) ++ writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); ++ ++ // All stores are grouped in one node to allow the matching between ++ // the size of Ins and InVals. This only happens when on varg functions ++ if (!OutChains.empty()) { ++ OutChains.push_back(Chain); ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); ++ } ++ ++ return Chain; ++} ++ ++//===----------------------------------------------------------------------===// ++// Return Value Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++ ++bool ++LoongArchTargetLowering::CanLowerReturn(CallingConv::ID CallConv, ++ MachineFunction &MF, bool IsVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const { ++ SmallVector RVLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); ++ return CCInfo.CheckReturn(Outs, RetCC_LoongArch); ++} ++ ++bool ++LoongArchTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { ++ if ((ABI.IsLPX32() || ABI.IsLP64()) && Type == MVT::i32) ++ return true; ++ ++ return IsSigned; ++} ++ ++SDValue ++LoongArchTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, ++ bool IsVarArg, ++ const SmallVectorImpl &Outs, ++ const SmallVectorImpl &OutVals, ++ const SDLoc &DL, SelectionDAG &DAG) const { ++ // CCValAssign - represent the assignment of ++ // the return value to a location ++ SmallVector RVLocs; ++ MachineFunction &MF = DAG.getMachineFunction(); ++ ++ // CCState - Info about the registers and stack slot. ++ LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); ++ ++ // Analyze return values. ++ CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch); ++ ++ SDValue Flag; ++ SmallVector RetOps(1, Chain); ++ ++ // Copy the result values into the output registers. ++ for (unsigned i = 0; i != RVLocs.size(); ++i) { ++ SDValue Val = OutVals[i]; ++ CCValAssign &VA = RVLocs[i]; ++ assert(VA.isRegLoc() && "Can only return in registers!"); ++ bool UseUpperBits = false; ++ ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::AExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::AExt: ++ Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::ZExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::ZExt: ++ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::SExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::SExt: ++ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ } ++ ++ if (UseUpperBits) { ++ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ Val = DAG.getNode( ++ ISD::SHL, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); ++ ++ // Guarantee that all emitted copies are stuck together with flags. ++ Flag = Chain.getValue(1); ++ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + +- return SDValue(); ++ // The loongarch ABIs for returning structs by value requires that we copy ++ // the sret argument into $v0 for the return. We saved the argument into ++ // a virtual register in the entry block, so now we copy the value out ++ // and into $v0. ++ if (MF.getFunction().hasStructRetAttr()) { ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ unsigned Reg = LoongArchFI->getSRetReturnReg(); ++ ++ if (!Reg) ++ llvm_unreachable("sret virtual register not created in the entry block"); ++ SDValue Val = ++ DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); ++ unsigned A0 = ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; ++ ++ Chain = DAG.getCopyToReg(Chain, DL, A0, Val, Flag); ++ Flag = Chain.getValue(1); ++ RetOps.push_back(DAG.getRegister(A0, getPointerTy(DAG.getDataLayout()))); ++ } ++ ++ RetOps[0] = Chain; // Update chain. ++ ++ // Add the flag if we have it. ++ if (Flag.getNode()) ++ RetOps.push_back(Flag); ++ ++ // Standard return on LoongArch is a "jr $ra" ++ return DAG.getNode(LoongArchISD::Ret, DL, MVT::Other, RetOps); + } + +-SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, +- DAGCombinerInfo &DCI) const { +- SelectionDAG &DAG = DCI.DAG; +- switch (N->getOpcode()) { ++//===----------------------------------------------------------------------===// ++// LoongArch Inline Assembly Support ++//===----------------------------------------------------------------------===// ++ ++/// getConstraintType - Given a constraint letter, return the type of ++/// constraint it is for this target. ++LoongArchTargetLowering::ConstraintType ++LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { ++ // LoongArch specific constraints ++ // GCC config/loongarch/constraints.md ++ // ++ // 'f': Floating Point register ++ // 'G': Floating-point 0 ++ // 'l': Signed 16-bit constant ++ // 'R': Memory address that can be used in a non-macro load or store ++ // "ZC" Memory address with 16-bit and 4 bytes aligned offset ++ // "ZB" Memory address with 0 offset ++ ++ if (Constraint.size() == 1) { ++ switch (Constraint[0]) { ++ default : break; ++ case 'f': ++ return C_RegisterClass; ++ case 'l': ++ case 'G': ++ return C_Other; ++ case 'R': ++ return C_Memory; ++ } ++ } ++ ++ if (Constraint == "ZC" || Constraint == "ZB") ++ return C_Memory; ++ ++ return TargetLowering::getConstraintType(Constraint); ++} ++ ++/// Examine constraint type and operand type and determine a weight value. ++/// This object must already have been set up with the operand type ++/// and the current alternative constraint selected. ++TargetLowering::ConstraintWeight ++LoongArchTargetLowering::getSingleConstraintMatchWeight( ++ AsmOperandInfo &info, const char *constraint) const { ++ ConstraintWeight weight = CW_Invalid; ++ Value *CallOperandVal = info.CallOperandVal; ++ // If we don't have a value, we can't do a match, ++ // but allow it at the lowest weight. ++ if (!CallOperandVal) ++ return CW_Default; ++ Type *type = CallOperandVal->getType(); ++ // Look at the constraint type. ++ switch (*constraint) { + default: ++ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); ++ break; ++ case 'f': // FPU ++ if (Subtarget.hasLSX() && type->isVectorTy() && ++ type->getPrimitiveSizeInBits() == 128) ++ weight = CW_Register; ++ else if (Subtarget.hasLASX() && type->isVectorTy() && ++ type->getPrimitiveSizeInBits() == 256) ++ weight = CW_Register; ++ else if (type->isFloatTy()) ++ weight = CW_Register; ++ break; ++ case 'l': // signed 16 bit immediate ++ case 'I': // signed 12 bit immediate ++ case 'J': // integer zero ++ case 'G': // floating-point zero ++ case 'K': // unsigned 12 bit immediate ++ if (isa(CallOperandVal)) ++ weight = CW_Constant; ++ break; ++ case 'm': ++ case 'R': ++ weight = CW_Memory; + break; +- case ISD::AND: +- return performANDCombine(N, DAG, DCI, Subtarget); +- case ISD::OR: +- return performORCombine(N, DAG, DCI, Subtarget); +- case ISD::SRL: +- return performSRLCombine(N, DAG, DCI, Subtarget); + } +- return SDValue(); ++ return weight; + } + +-static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, +- MachineBasicBlock &MBB, +- const TargetInstrInfo &TII) { +- if (!ZeroDivCheck) +- return &MBB; ++/// This is a helper function to parse a physical register string and split it ++/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag ++/// that is returned indicates whether parsing was successful. The second flag ++/// is true if the numeric part exists. ++static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, ++ unsigned long long &Reg) { ++ if (C.empty() || C.front() != '{' || C.back() != '}') ++ return std::make_pair(false, false); + +- // Build instructions: +- // div(or mod) $dst, $dividend, $divisor +- // bnez $divisor, 8 +- // break 7 +- // fallthrough +- MachineOperand &Divisor = MI.getOperand(2); +- auto FallThrough = std::next(MI.getIterator()); ++ // Search for the first numeric character. ++ StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; ++ I = std::find_if(B, E, isdigit); + +- BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ)) +- .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) +- .addImm(8); ++ Prefix = StringRef(B, I - B); + +- // See linux header file arch/loongarch/include/uapi/asm/break.h for the +- // definition of BRK_DIVZERO. +- BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK)) +- .addImm(7/*BRK_DIVZERO*/); ++ // The second flag is set to false if no numeric characters were found. ++ if (I == E) ++ return std::make_pair(true, false); + +- // Clear Divisor's kill flag. +- Divisor.setIsKill(false); ++ // Parse the numeric characters. ++ return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), ++ true); ++} + +- return &MBB; ++EVT LoongArchTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, ++ ISD::NodeType) const { ++ bool Cond = !Subtarget.isABI_LP32() && VT.getSizeInBits() == 32; ++ EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32); ++ return VT.bitsLT(MinVT) ? MinVT : VT; + } + +-MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( +- MachineInstr &MI, MachineBasicBlock *BB) const { ++static const TargetRegisterClass *getRegisterClassForVT(MVT VT, bool Is64Bit) { ++ // Newer llvm versions (>= 12) do not require simple VTs for constraints and ++ // they use MVT::Other for constraints with complex VTs. For more details, ++ // please see https://reviews.llvm.org/D91710. ++ if (VT == MVT::Other || VT.getSizeInBits() <= 32) ++ return &LoongArch::GPR32RegClass; ++ if (VT.getSizeInBits() <= 64) ++ return Is64Bit ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ return nullptr; ++} + +- switch (MI.getOpcode()) { +- default: +- llvm_unreachable("Unexpected instr type to insert"); +- case LoongArch::DIV_W: +- case LoongArch::DIV_WU: +- case LoongArch::MOD_W: +- case LoongArch::MOD_WU: +- case LoongArch::DIV_D: +- case LoongArch::DIV_DU: +- case LoongArch::MOD_D: +- case LoongArch::MOD_DU: +- return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo()); +- break; ++std::pair LoongArchTargetLowering:: ++parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { ++ const TargetRegisterInfo *TRI = ++ Subtarget.getRegisterInfo(); ++ const TargetRegisterClass *RC; ++ StringRef Prefix; ++ unsigned long long Reg; ++ ++ std::pair R = parsePhysicalReg(C, Prefix, Reg); ++ ++ if (!R.first) ++ return std::make_pair(0U, nullptr); ++ ++ if (!R.second) ++ return std::make_pair(0U, nullptr); ++ ++ if (Prefix == "$f") { // Parse $f0-$f31. ++ // If the size of FP registers is 64-bit or Reg is an even number, select ++ // the 64-bit register class. Otherwise, select the 32-bit register class. ++ if (VT == MVT::Other) ++ VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32; ++ ++ RC = getRegClassFor(VT); ++ } ++ else if (Prefix == "$vr") { // Parse $vr0-$vr31. ++ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); ++ } ++ else if (Prefix == "$xr") { // Parse $xr0-$xr31. ++ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); ++ } ++ else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. ++ RC = TRI->getRegClass(LoongArch::FCFRRegClassID); ++ else { // Parse $r0-$r31. ++ assert(Prefix == "$r"); ++ if ((RC = getRegisterClassForVT(VT, Subtarget.is64Bit())) == nullptr) { ++ // This will generate an error message. ++ return std::make_pair(0U, nullptr); ++ } + } ++ ++ assert(Reg < RC->getNumRegs()); ++ ++ if (RC == &LoongArch::GPR64RegClass || RC == &LoongArch::GPR32RegClass) { ++ // Sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ // that just like LoongArchAsmParser.cpp ++ switch (Reg) { ++ case 0: return std::make_pair(*(RC->begin() + 0), RC); // r0 ++ case 1: return std::make_pair(*(RC->begin() + 27), RC); // r1 ++ case 2: return std::make_pair(*(RC->begin() + 28), RC); // r2 ++ case 3: return std::make_pair(*(RC->begin() + 29), RC); // r3 ++ case 4: return std::make_pair(*(RC->begin() + 1), RC); // r4 ++ case 5: return std::make_pair(*(RC->begin() + 2), RC); // r5 ++ case 6: return std::make_pair(*(RC->begin() + 3), RC); // r6 ++ case 7: return std::make_pair(*(RC->begin() + 4), RC); // r7 ++ case 8: return std::make_pair(*(RC->begin() + 5), RC); // r8 ++ case 9: return std::make_pair(*(RC->begin() + 6), RC); // r9 ++ case 10: return std::make_pair(*(RC->begin() + 7), RC); // r10 ++ case 11: return std::make_pair(*(RC->begin() + 8), RC); // r11 ++ case 12: return std::make_pair(*(RC->begin() + 9), RC); // r12 ++ case 13: return std::make_pair(*(RC->begin() + 10), RC); // r13 ++ case 14: return std::make_pair(*(RC->begin() + 11), RC); // r14 ++ case 15: return std::make_pair(*(RC->begin() + 12), RC); // r15 ++ case 16: return std::make_pair(*(RC->begin() + 13), RC); // r16 ++ case 17: return std::make_pair(*(RC->begin() + 14), RC); // r17 ++ case 18: return std::make_pair(*(RC->begin() + 15), RC); // r18 ++ case 19: return std::make_pair(*(RC->begin() + 16), RC); // r19 ++ case 20: return std::make_pair(*(RC->begin() + 17), RC); // r20 ++ case 21: return std::make_pair(*(RC->begin() + 30), RC); // r21 ++ case 22: return std::make_pair(*(RC->begin() + 31), RC); // r22 ++ case 23: return std::make_pair(*(RC->begin() + 18), RC); // r23 ++ case 24: return std::make_pair(*(RC->begin() + 19), RC); // r24 ++ case 25: return std::make_pair(*(RC->begin() + 20), RC); // r25 ++ case 26: return std::make_pair(*(RC->begin() + 21), RC); // r26 ++ case 27: return std::make_pair(*(RC->begin() + 22), RC); // r27 ++ case 28: return std::make_pair(*(RC->begin() + 23), RC); // r28 ++ case 29: return std::make_pair(*(RC->begin() + 24), RC); // r29 ++ case 30: return std::make_pair(*(RC->begin() + 25), RC); // r30 ++ case 31: return std::make_pair(*(RC->begin() + 26), RC); // r31 ++ } ++ } ++ return std::make_pair(*(RC->begin() + Reg), RC); + } + +-const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { +- switch ((LoongArchISD::NodeType)Opcode) { +- case LoongArchISD::FIRST_NUMBER: +- break; +- +-#define NODE_NAME_CASE(node) \ +- case LoongArchISD::node: \ +- return "LoongArchISD::" #node; +- +- // TODO: Add more target-dependent nodes later. +- NODE_NAME_CASE(CALL) +- NODE_NAME_CASE(RET) +- NODE_NAME_CASE(SLL_W) +- NODE_NAME_CASE(SRA_W) +- NODE_NAME_CASE(SRL_W) +- NODE_NAME_CASE(BSTRINS) +- NODE_NAME_CASE(BSTRPICK) +- NODE_NAME_CASE(MOVGR2FR_W_LA64) +- NODE_NAME_CASE(MOVFR2GR_S_LA64) +- NODE_NAME_CASE(FTINT) +- } +-#undef NODE_NAME_CASE +- return nullptr; ++/// Given a register class constraint, like 'r', if this corresponds directly ++/// to an LLVM register class, return a register of 0 and the register class ++/// pointer. ++std::pair ++LoongArchTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, ++ StringRef Constraint, ++ MVT VT) const { ++ if (Constraint.size() == 1) { ++ switch (Constraint[0]) { ++ case 'r': ++ return std::make_pair(0U, getRegisterClassForVT(VT, Subtarget.is64Bit())); ++ case 'f': // FPU or LSX register ++ if (VT == MVT::v16i8) ++ return std::make_pair(0U, &LoongArch::LSX128BRegClass); ++ else if (VT == MVT::v8i16) ++ return std::make_pair(0U, &LoongArch::LSX128HRegClass); ++ else if (VT == MVT::v4i32 || VT == MVT::v4f32) ++ return std::make_pair(0U, &LoongArch::LSX128WRegClass); ++ else if (VT == MVT::v2i64 || VT == MVT::v2f64) ++ return std::make_pair(0U, &LoongArch::LSX128DRegClass); ++ else if (VT == MVT::v32i8) ++ return std::make_pair(0U, &LoongArch::LASX256BRegClass); ++ else if (VT == MVT::v16i16) ++ return std::make_pair(0U, &LoongArch::LASX256HRegClass); ++ else if (VT == MVT::v8i32 || VT == MVT::v8f32) ++ return std::make_pair(0U, &LoongArch::LASX256WRegClass); ++ else if (VT == MVT::v4i64 || VT == MVT::v4f64) ++ return std::make_pair(0U, &LoongArch::LASX256DRegClass); ++ else if (VT == MVT::f32) ++ return std::make_pair(0U, &LoongArch::FGR32RegClass); ++ else if (VT == MVT::f64) ++ return std::make_pair(0U, &LoongArch::FGR64RegClass); ++ break; ++ } ++ } ++ ++ std::pair R; ++ R = parseRegForInlineAsmConstraint(Constraint, VT); ++ ++ if (R.second) ++ return R; ++ ++ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + } + +-//===----------------------------------------------------------------------===// +-// Calling Convention Implementation +-//===----------------------------------------------------------------------===// +-// FIXME: Now, we only support CallingConv::C with fixed arguments which are +-// passed with integer or floating-point registers. +-const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, +- LoongArch::R7, LoongArch::R8, LoongArch::R9, +- LoongArch::R10, LoongArch::R11}; +-const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, +- LoongArch::F3, LoongArch::F4, LoongArch::F5, +- LoongArch::F6, LoongArch::F7}; +-const MCPhysReg ArgFPR64s[] = { +- LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, +- LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; +- +-// Implements the LoongArch calling convention. Returns true upon failure. +-static bool CC_LoongArch(unsigned ValNo, MVT ValVT, +- CCValAssign::LocInfo LocInfo, CCState &State) { +- // Allocate to a register if possible. +- Register Reg; +- +- if (ValVT == MVT::f32) +- Reg = State.AllocateReg(ArgFPR32s); +- else if (ValVT == MVT::f64) +- Reg = State.AllocateReg(ArgFPR64s); +- else +- Reg = State.AllocateReg(ArgGPRs); +- if (Reg) { +- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo)); ++/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops ++/// vector. If it is invalid, don't add anything to Ops. ++void LoongArchTargetLowering::LowerAsmOperandForConstraint(SDValue Op, ++ std::string &Constraint, ++ std::vector&Ops, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ SDValue Result; ++ ++ // Only support length 1 constraints for now. ++ if (Constraint.length() > 1) return; ++ ++ char ConstraintLetter = Constraint[0]; ++ switch (ConstraintLetter) { ++ default: break; // This will fall through to the generic implementation ++ case 'l': // Signed 16 bit constant ++ // If this fails, the parent routine will give an error ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getSExtValue(); ++ if (isInt<16>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'I': // Signed 12 bit constant ++ // If this fails, the parent routine will give an error ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getSExtValue(); ++ if (isInt<12>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'J': // integer zero ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getZExtValue(); ++ if (Val == 0) { ++ Result = DAG.getTargetConstant(0, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'G': // floating-point zero ++ if (ConstantFPSDNode *C = dyn_cast(Op)) { ++ if (C->isZero()) { ++ EVT Type = Op.getValueType(); ++ Result = DAG.getTargetConstantFP(0, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'K': // unsigned 12 bit immediate ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ uint64_t Val = (uint64_t)C->getZExtValue(); ++ if (isUInt<12>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ } ++ ++ if (Result.getNode()) { ++ Ops.push_back(Result); ++ return; ++ } ++ ++ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); ++} ++ ++bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, ++ const AddrMode &AM, Type *Ty, ++ unsigned AS, Instruction *I) const { ++ // No global is ever allowed as a base. ++ if (AM.BaseGV) ++ return false; ++ ++ switch (AM.Scale) { ++ case 0: // "r+i" or just "i", depending on HasBaseReg. ++ break; ++ case 1: ++ if (!AM.HasBaseReg) // allow "r+i". ++ break; ++ return false; // disallow "r+r" or "r+r+i". ++ default: + return false; + } + +- // TODO: Handle arguments passed without register. + return true; + } + +-void LoongArchTargetLowering::analyzeInputArgs( +- CCState &CCInfo, const SmallVectorImpl &Ins, +- LoongArchCCAssignFn Fn) const { +- for (unsigned i = 0, e = Ins.size(); i != e; ++i) { +- MVT ArgVT = Ins[i].VT; ++bool ++LoongArchTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { ++ // The LoongArch target isn't yet aware of offsets. ++ return false; ++} + +- if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { +- LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " +- << EVT(ArgVT).getEVTString() << '\n'); +- llvm_unreachable(""); ++EVT LoongArchTargetLowering::getOptimalMemOpType( ++ const MemOp &Op, const AttributeList &FuncAttributes) const { ++ if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { ++ if (Op.size() >= 16) { ++ if (Op.size() >= 32 && Subtarget.hasLASX()) { ++ return MVT::v32i8; ++ } ++ if (Subtarget.hasLSX()) ++ return MVT::v16i8; + } + } ++ ++ if (Subtarget.is64Bit()) ++ return MVT::i64; ++ ++ return MVT::i32; ++} ++ ++/// isFPImmLegal - Returns true if the target can instruction select the ++/// specified FP immediate natively. If false, the legalizer will ++/// materialize the FP immediate as a load from a constant pool. ++bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, ++ bool ForCodeSize) const { ++ if (VT != MVT::f32 && VT != MVT::f64) ++ return false; ++ if (Imm.isNegZero()) ++ return false; ++ return (Imm.isZero() || Imm.isExactlyValue(+1.0)); ++} ++ ++bool LoongArchTargetLowering::useSoftFloat() const { ++ return Subtarget.useSoftFloat(); ++} ++ ++// Return whether the an instruction can potentially be optimized to a tail ++// call. This will cause the optimizers to attempt to move, or duplicate, ++// return instructions to help enable tail call optimizations for this ++// instruction. ++bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { ++ return CI->isTailCall(); ++} ++ ++void LoongArchTargetLowering::copyByValRegs( ++ SDValue Chain, const SDLoc &DL, std::vector &OutChains, ++ SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, ++ SmallVectorImpl &InVals, const Argument *FuncArg, ++ unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, ++ LoongArchCCState &State) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ unsigned NumRegs = LastReg - FirstReg; ++ unsigned RegAreaSize = NumRegs * GPRSizeInBytes; ++ unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); ++ int FrameObjOffset; ++ ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); ++ ++ if (RegAreaSize) ++ FrameObjOffset = -(int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); ++ else ++ FrameObjOffset = VA.getLocMemOffset(); ++ ++ // Create frame object. ++ EVT PtrTy = getPointerTy(DAG.getDataLayout()); ++ // Make the fixed object stored to mutable so that the load instructions ++ // referencing it have their memory dependencies added. ++ // Set the frame object as isAliased which clears the underlying objects ++ // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all ++ // stores as dependencies for loads referencing this fixed object. ++ int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); ++ SDValue FIN = DAG.getFrameIndex(FI, PtrTy); ++ InVals.push_back(FIN); ++ ++ if (!NumRegs) ++ return; ++ ++ // Copy arg registers. ++ MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); ++ const TargetRegisterClass *RC = getRegClassFor(RegTy); ++ ++ for (unsigned I = 0; I < NumRegs; ++I) { ++ unsigned ArgReg = ByValArgRegs[FirstReg + I]; ++ unsigned VReg = addLiveIn(MF, ArgReg, RC); ++ unsigned Offset = I * GPRSizeInBytes; ++ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, ++ DAG.getConstant(Offset, DL, PtrTy)); ++ SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), ++ StorePtr, MachinePointerInfo(FuncArg, Offset)); ++ OutChains.push_back(Store); ++ } + } + +-void LoongArchTargetLowering::analyzeOutputArgs( +- CCState &CCInfo, const SmallVectorImpl &Outs, +- LoongArchCCAssignFn Fn) const { +- for (unsigned i = 0, e = Outs.size(); i != e; ++i) { +- MVT ArgVT = Outs[i].VT; ++// Copy byVal arg to registers and stack. ++void LoongArchTargetLowering::passByValArg( ++ SDValue Chain, const SDLoc &DL, ++ std::deque> &RegsToPass, ++ SmallVectorImpl &MemOpChains, SDValue StackPtr, ++ MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, ++ unsigned LastReg, const ISD::ArgFlagsTy &Flags, ++ const CCValAssign &VA) const { ++ unsigned ByValSizeInBytes = Flags.getByValSize(); ++ unsigned OffsetInBytes = 0; // From beginning of struct ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ Align Alignment = ++ std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes)); ++ EVT PtrTy = getPointerTy(DAG.getDataLayout()), ++ RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); ++ unsigned NumRegs = LastReg - FirstReg; ++ ++ if (NumRegs) { ++ ArrayRef ArgRegs = ABI.GetByValArgRegs(); ++ bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); ++ unsigned I = 0; ++ ++ // Copy words to registers. ++ for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { ++ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, PtrTy)); ++ SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, ++ MachinePointerInfo(), Alignment); ++ MemOpChains.push_back(LoadVal.getValue(1)); ++ unsigned ArgReg = ArgRegs[FirstReg + I]; ++ RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); ++ } ++ ++ // Return if the struct has been fully copied. ++ if (ByValSizeInBytes == OffsetInBytes) ++ return; ++ ++ // Copy the remainder of the byval argument with sub-word loads and shifts. ++ if (LeftoverBytes) { ++ SDValue Val; ++ ++ for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; ++ OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { ++ unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; + +- if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { +- LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " +- << EVT(ArgVT).getEVTString() << "\n"); +- llvm_unreachable(""); ++ if (RemainingSizeInBytes < LoadSizeInBytes) ++ continue; ++ ++ // Load subword. ++ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, ++ PtrTy)); ++ SDValue LoadVal = DAG.getExtLoad( ++ ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), ++ MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); ++ MemOpChains.push_back(LoadVal.getValue(1)); ++ ++ // Shift the loaded value. ++ unsigned Shamt; ++ ++ Shamt = TotalBytesLoaded * 8; ++ ++ SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, ++ DAG.getConstant(Shamt, DL, MVT::i32)); ++ ++ if (Val.getNode()) ++ Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); ++ else ++ Val = Shift; ++ ++ OffsetInBytes += LoadSizeInBytes; ++ TotalBytesLoaded += LoadSizeInBytes; ++ Alignment = std::min(Alignment, Align(LoadSizeInBytes)); ++ } ++ ++ unsigned ArgReg = ArgRegs[FirstReg + I]; ++ RegsToPass.push_back(std::make_pair(ArgReg, Val)); ++ return; + } + } ++ ++ // Copy remainder of byval arg to it with memcpy. ++ unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; ++ SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, PtrTy)); ++ SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, ++ DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); ++ Chain = DAG.getMemcpy( ++ Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), ++ Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, ++ /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); ++ MemOpChains.push_back(Chain); + } + +-static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, +- const CCValAssign &VA, const SDLoc &DL, +- const LoongArchTargetLowering &TLI) { ++void LoongArchTargetLowering::writeVarArgRegs(std::vector &OutChains, ++ SDValue Chain, const SDLoc &DL, ++ SelectionDAG &DAG, ++ CCState &State) const { ++ ArrayRef ArgRegs = ABI.GetVarArgRegs(); ++ unsigned Idx = State.getFirstUnallocated(ArgRegs); ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); ++ const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); +- MachineRegisterInfo &RegInfo = MF.getRegInfo(); +- EVT LocVT = VA.getLocVT(); +- const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); +- Register VReg = RegInfo.createVirtualRegister(RC); +- RegInfo.addLiveIn(VA.getLocReg(), VReg); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + +- return DAG.getCopyFromReg(Chain, DL, VReg, LocVT); +-} +- +-// Transform physical registers into virtual registers. +-SDValue LoongArchTargetLowering::LowerFormalArguments( +- SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, +- const SmallVectorImpl &Ins, const SDLoc &DL, +- SelectionDAG &DAG, SmallVectorImpl &InVals) const { ++ // Offset of the first variable argument from stack pointer. ++ int VaArgOffset, VarArgsSaveSize; + +- MachineFunction &MF = DAG.getMachineFunction(); ++ if (ArgRegs.size() == Idx) { ++ VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); ++ VarArgsSaveSize = 0; ++ } else { ++ VarArgsSaveSize = (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); ++ VaArgOffset = -VarArgsSaveSize; ++ } + +- switch (CallConv) { +- default: +- llvm_unreachable("Unsupported calling convention"); +- case CallingConv::C: +- break; ++ // Record the frame index of the first variable argument ++ // which is a value necessary to VASTART. ++ int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); ++ LoongArchFI->setVarArgsFrameIndex(FI); ++ ++ // If saving an odd number of registers then create an extra stack slot to ++ // ensure that the frame pointer is 2*GRLEN-aligned, which in turn ensures ++ // offsets to even-numbered registered remain 2*GRLEN-aligned. ++ if (Idx % 2) { ++ MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset - (int)RegSizeInBytes, ++ true); ++ VarArgsSaveSize += RegSizeInBytes; + } + +- // Assign locations to all of the incoming arguments. +- SmallVector ArgLocs; +- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ++ // Copy the integer registers that have not been used for argument passing ++ // to the argument register save area. For LP32, the save area is allocated ++ // in the caller's stack frame, while for LPX32/LP64, it is allocated in the ++ // callee's stack frame. ++ for (unsigned I = Idx; I < ArgRegs.size(); ++ ++I, VaArgOffset += RegSizeInBytes) { ++ unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); ++ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); ++ FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); ++ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ SDValue Store = ++ DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); ++ cast(Store.getNode())->getMemOperand()->setValue( ++ (Value *)nullptr); ++ OutChains.push_back(Store); ++ } ++ LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); ++} + +- analyzeInputArgs(CCInfo, Ins, CC_LoongArch); ++void LoongArchTargetLowering::HandleByVal(CCState *State, unsigned &Size, ++ Align Alignment) const { ++ const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); ++ ++ assert(Size && "Byval argument's size shouldn't be 0."); ++ ++ Alignment = std::min(Alignment, TFL->getStackAlign()); ++ ++ unsigned FirstReg = 0; ++ unsigned NumRegs = 0; ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ ArrayRef IntArgRegs = ABI.GetByValArgRegs(); ++ // FIXME: The LP32 case actually describes no shadow registers. ++ const MCPhysReg *ShadowRegs = ++ ABI.IsLP32() ? IntArgRegs.data() : LoongArch64DPRegs; ++ ++ // We used to check the size as well but we can't do that anymore since ++ // CCState::HandleByVal() rounds up the size after calling this function. ++ assert(Alignment >= Align(RegSizeInBytes) && ++ "Byval argument's alignment should be a multiple of RegSizeInBytes."); ++ ++ FirstReg = State->getFirstUnallocated(IntArgRegs); ++ ++ // If Alignment > RegSizeInBytes, the first arg register must be even. ++ // FIXME: This condition happens to do the right thing but it's not the ++ // right way to test it. We want to check that the stack frame offset ++ // of the register is aligned. ++ if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) { ++ State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); ++ ++FirstReg; ++ // assert(true && "debug#######################################"); ++ } + +- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) +- InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this)); ++ // Mark the registers allocated. ++ // Size = alignTo(Size, RegSizeInBytes); ++ // for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); ++ // Size -= RegSizeInBytes, ++I, ++NumRegs) ++ // State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); + +- return Chain; ++ State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); + } + +-// Lower a call to a callseq_start + CALL + callseq_end chain, and add input +-// and output parameter nodes. +-SDValue +-LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, +- SmallVectorImpl &InVals) const { +- SelectionDAG &DAG = CLI.DAG; +- SDLoc &DL = CLI.DL; +- SmallVectorImpl &Outs = CLI.Outs; +- SmallVectorImpl &OutVals = CLI.OutVals; +- SmallVectorImpl &Ins = CLI.Ins; +- SDValue Chain = CLI.Chain; +- SDValue Callee = CLI.Callee; +- CallingConv::ID CallConv = CLI.CallConv; +- bool IsVarArg = CLI.IsVarArg; +- EVT PtrVT = getPointerTy(DAG.getDataLayout()); +- CLI.IsTailCall = false; ++MachineBasicBlock *LoongArchTargetLowering::emitPseudoSELECT(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ bool isFPCmp, ++ unsigned Opc) const { ++ const TargetInstrInfo *TII = ++ Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ // To "insert" a SELECT instruction, we actually have to insert the ++ // diamond control-flow pattern. The incoming instruction knows the ++ // destination vreg to set, the condition code register to branch on, the ++ // true/false values to select between, and a branch opcode to use. ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ ++ // thisMBB: ++ // ... ++ // TrueVal = ... ++ // setcc r1, r2, r3 ++ // bNE r1, r0, copy1MBB ++ // fallthrough --> copy0MBB ++ MachineBasicBlock *thisMBB = BB; ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, copy0MBB); ++ F->insert(It, sinkMBB); ++ ++ // Transfer the remainder of BB and its successor edges to sinkMBB. ++ sinkMBB->splice(sinkMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ sinkMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Next, add the true and fallthrough blocks as its successors. ++ BB->addSuccessor(copy0MBB); ++ BB->addSuccessor(sinkMBB); ++ ++ if (isFPCmp) { ++ // bc1[tf] cc, sinkMBB ++ BuildMI(BB, DL, TII->get(Opc)) ++ .addReg(MI.getOperand(1).getReg()) ++ .addMBB(sinkMBB); ++ } else { ++ BuildMI(BB, DL, TII->get(Opc)) ++ .addReg(MI.getOperand(1).getReg()) ++ .addReg(LoongArch::ZERO) ++ .addMBB(sinkMBB); ++ } + +- if (IsVarArg) +- report_fatal_error("LowerCall with varargs not implemented"); ++ // copy0MBB: ++ // %FalseValue = ... ++ // # fallthrough to sinkMBB ++ BB = copy0MBB; + +- MachineFunction &MF = DAG.getMachineFunction(); ++ // Update machine-CFG edges ++ BB->addSuccessor(sinkMBB); + +- // Analyze the operands of the call, assigning locations to each operand. +- SmallVector ArgLocs; +- CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ++ // sinkMBB: ++ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] ++ // ... ++ BB = sinkMBB; + +- analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); ++ BuildMI(*BB, BB->begin(), DL, TII->get(LoongArch::PHI), MI.getOperand(0).getReg()) ++ .addReg(MI.getOperand(2).getReg()) ++ .addMBB(thisMBB) ++ .addReg(MI.getOperand(3).getReg()) ++ .addMBB(copy0MBB); + +- // Get a count of how many bytes are to be pushed on the stack. +- unsigned NumBytes = ArgCCInfo.getNextStackOffset(); ++ MI.eraseFromParent(); // The pseudo instruction is gone now. + +- for (auto &Arg : Outs) { +- if (!Arg.Flags.isByVal()) +- continue; +- report_fatal_error("Passing arguments byval not implemented"); ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitLSXCBranchPseudo( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { ++ ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const TargetRegisterClass *RC = &LoongArch::GPR32RegClass; ++ DebugLoc DL = MI.getDebugLoc(); ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, FBB); ++ F->insert(It, TBB); ++ F->insert(It, Sink); ++ ++ // Transfer the remainder of BB and its successor edges to Sink. ++ Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), ++ BB->end()); ++ Sink->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Add successors. ++ BB->addSuccessor(FBB); ++ BB->addSuccessor(TBB); ++ FBB->addSuccessor(Sink); ++ TBB->addSuccessor(Sink); ++ // Insert the real bnz.b instruction to $BB. ++ BuildMI(BB, DL, TII->get(BranchOp)) ++ .addReg(LoongArch::FCC0) ++ .addReg(MI.getOperand(1).getReg()); ++ ++ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)) ++ .addReg(LoongArch::FCC0) ++ .addMBB(TBB); ++ ++ // Fill $FBB. ++ unsigned RD1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::ADDI_W), RD1) ++ .addReg(LoongArch::ZERO) ++ .addImm(0); ++ BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::B32)).addMBB(Sink); ++ ++ // Fill $TBB. ++ unsigned RD2 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*TBB, TBB->end(), DL, TII->get(LoongArch::ADDI_W), RD2) ++ .addReg(LoongArch::ZERO) ++ .addImm(1); ++ ++ // Insert phi function to $Sink. ++ BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(RD1) ++ .addMBB(FBB) ++ .addReg(RD2) ++ .addMBB(TBB); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return Sink; ++} ++ ++// Emit the COPY_FW pseudo instruction. ++// ++// copy_fw_pseudo $fd, $vk, n ++// => ++// vreplvei.w $rt, $vk, $n ++// copy $rt, $fd ++// ++// When n is zero, the equivalent operation can be performed with (potentially) ++// zero instructions due to register overlaps. ++MachineBasicBlock * ++LoongArchTargetLowering::emitCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Vk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ ++ if (Lane == 0) { ++ unsigned Vj = Vk; ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_lo); ++ } else { ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vj) ++ .addReg(Vk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_lo); + } + +- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Copy argument values to their designated locations. +- SmallVector> RegsToPass; +- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { +- CCValAssign &VA = ArgLocs[i]; +- SDValue ArgValue = OutVals[i]; ++// Emit the COPY_FD pseudo instruction. ++// ++// copy_fd_pseudo $fd, $vj, n ++// => ++// vreplvei.d $vd, $vj, $n ++// copy $fd, $vd:sub_64 ++// ++// When n is zero, the equivalent operation can be performed with (potentially) ++// zero instructions due to register overlaps. ++MachineBasicBlock * ++LoongArchTargetLowering::emitCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Vk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ if (Lane == 0) ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vk, 0, LoongArch::sub_64); ++ else { ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ assert(Lane == 1); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vj) ++ .addReg(Vk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_64); ++ } + +- // Promote the value if needed. +- // For now, only handle fully promoted arguments. +- if (VA.getLocInfo() != CCValAssign::Full) +- report_fatal_error("Unknown loc info"); ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- if (VA.isRegLoc()) { +- // Queue up the argument copies and emit them at the end. +- RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); +- } else { +- report_fatal_error("Passing arguments via the stack not implemented"); +- } ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Xk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Xj = Xk; ++ ++ if (Lane == 0) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Xj, 0, LoongArch::sub_lo); ++ } else { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) ++ .addReg(Xk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } + +- SDValue Glue; ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Build a sequence of copy-to-reg nodes, chained and glued together. +- for (auto &Reg : RegsToPass) { +- Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); +- Glue = Chain.getValue(1); ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Xk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); ++ if (Lane == 0) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Xk, 0, LoongArch::sub_64); ++ } else { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_DU), Rj) ++ .addReg(Xk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } + +- // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a +- // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't +- // split it and then direct call can be matched by PseudoCALL. +- // FIXME: Add target flags for relocation. +- if (GlobalAddressSDNode *S = dyn_cast(Callee)) +- Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); +- else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) +- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // The first call operand is the chain and the second is the target address. +- SmallVector Ops; +- Ops.push_back(Chain); +- Ops.push_back(Callee); ++MachineBasicBlock *LoongArchTargetLowering::emitCONCAT_VECTORS( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Bytes) const { + +- // Add argument registers to the end of the list so that they are +- // known live into the call. +- for (auto &Reg : RegsToPass) +- Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned SubReg1 = MI.getOperand(1).getReg(); ++ unsigned SubReg2 = MI.getOperand(2).getReg(); ++ const TargetRegisterClass *RC = nullptr; + +- // Add a register mask operand representing the call-preserved registers. +- const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); +- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); +- assert(Mask && "Missing call preserved mask for calling convention"); +- Ops.push_back(DAG.getRegisterMask(Mask)); ++ switch (Bytes) { ++ default: ++ llvm_unreachable("Unexpected size"); ++ case 1: ++ RC = &LoongArch::LASX256BRegClass; ++ break; ++ case 2: ++ RC = &LoongArch::LASX256HRegClass; ++ break; ++ case 4: ++ RC = &LoongArch::LASX256WRegClass; ++ break; ++ case 8: ++ RC = &LoongArch::LASX256DRegClass; ++ break; ++ } + +- // Glue the call to the argument copies, if any. +- if (Glue.getNode()) +- Ops.push_back(Glue); ++ unsigned X0 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X0) ++ .addImm(0) ++ .addReg(SubReg1) ++ .addImm(LoongArch::sub_128); ++ unsigned X1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X1) ++ .addImm(0) ++ .addReg(SubReg2) ++ .addImm(LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) ++ .addReg(X0) ++ .addReg(X1) ++ .addImm(2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Emit the call. +- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); ++// xcopy_fw_gpr_pseudo $fd, $xs, $rk ++// => ++// bb: addi.d $rt1, zero, 4 ++// bge $lane, $rt1 hbb ++// lbb:xvreplve.w $xt1, $xs, $lane ++// copy $rf0, $xt1 ++// b sink ++// hbb: addi.d $rt2, $lane, -4 ++// xvpermi.q $xt2 $xs, 1 ++// xvreplve.w $xt3, $xt2, $rt2 ++// copy $rf1, $xt3 ++// sink:phi ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FW_GPR(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xs = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getReg(); ++ ++ const TargetRegisterClass *RC = &LoongArch::GPR64RegClass; ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *HBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *LBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, LBB); ++ F->insert(It, HBB); ++ F->insert(It, Sink); ++ ++ Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), ++ BB->end()); ++ Sink->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(LBB); ++ BB->addSuccessor(HBB); ++ HBB->addSuccessor(Sink); ++ LBB->addSuccessor(Sink); ++ ++ unsigned Rt1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(BB, DL, TII->get(LoongArch::ADDI_D), Rt1) ++ .addReg(LoongArch::ZERO_64) ++ .addImm(4); ++ BuildMI(BB, DL, TII->get(LoongArch::BGE)) ++ .addReg(Lane) ++ .addReg(Rt1) ++ .addMBB(HBB); ++ ++ unsigned Xt1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rf0 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt1) ++ .addReg(Xs) ++ .addReg(Lane); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::COPY), Rf0) ++ .addReg(Xt1, 0, LoongArch::sub_lo); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::B)).addMBB(Sink); ++ ++ unsigned Xt2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Xt3 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rt2 = RegInfo.createVirtualRegister(RC); ++ unsigned Rf1 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::ADDI_D), Rt2) ++ .addReg(Lane) ++ .addImm(-4); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVPERMI_Q), Xt2) ++ .addReg(Xs) ++ .addReg(Xs) ++ .addImm(1); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt3) ++ .addReg(Xt2) ++ .addReg(Rt2); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::COPY), Rf1) ++ .addReg(Xt3, 0, LoongArch::sub_lo); ++ ++ BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(Rf0) ++ .addMBB(LBB) ++ .addReg(Rf1) ++ .addMBB(HBB); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return Sink; ++} + +- Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); +- DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); +- Glue = Chain.getValue(1); ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, ++ unsigned Size) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ const TargetRegisterClass *VecRC = nullptr; ++ const TargetRegisterClass *SubVecRC = nullptr; ++ unsigned HalfSize = 0; ++ unsigned InsertOp = 0; ++ ++ if (Size == 1) { ++ VecRC = &LoongArch::LASX256BRegClass; ++ SubVecRC = &LoongArch::LSX128BRegClass; ++ HalfSize = 16; ++ InsertOp = LoongArch::VINSGR2VR_B; ++ } else if (Size == 2) { ++ VecRC = &LoongArch::LASX256HRegClass; ++ SubVecRC = &LoongArch::LSX128HRegClass; ++ HalfSize = 8; ++ InsertOp = LoongArch::VINSGR2VR_H; ++ } else { ++ llvm_unreachable("Unexpected type"); ++ } + +- // Mark the end of the call, which is glued to the call itself. +- Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), +- DAG.getConstant(0, DL, PtrVT, true), Glue, DL); +- Glue = Chain.getValue(1); ++ unsigned Xk = Xd_in; ++ unsigned Imm = Lane; ++ if (Lane >= HalfSize) { ++ Xk = RegInfo.createVirtualRegister(VecRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xk) ++ .addReg(Xd_in) ++ .addReg(Xd_in) ++ .addImm(1); ++ Imm = Lane - HalfSize; ++ } + +- // Assign locations to each value returned by this call. +- SmallVector RVLocs; +- CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); +- analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); ++ unsigned Xk128 = RegInfo.createVirtualRegister(SubVecRC); ++ unsigned Xd128 = RegInfo.createVirtualRegister(SubVecRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Xk128) ++ .addReg(Xk, 0, LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(InsertOp), Xd128) ++ .addReg(Xk128) ++ .addReg(Fs) ++ .addImm(Imm); ++ ++ unsigned Xd256 = Xd; ++ if (Lane >= HalfSize) { ++ Xd256 = RegInfo.createVirtualRegister(VecRC); ++ } + +- // Copy all of the result registers out of their specified physreg. +- for (auto &VA : RVLocs) { +- // Copy the value out. +- SDValue RetValue = +- DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); +- Chain = RetValue.getValue(1); +- Glue = RetValue.getValue(2); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xd256) ++ .addImm(0) ++ .addReg(Xd128) ++ .addImm(LoongArch::sub_128); + +- InVals.push_back(Chain.getValue(0)); ++ if (Lane >= HalfSize) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) ++ .addReg(Xd_in) ++ .addReg(Xd256) ++ .addImm(2); + } + +- return Chain; ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; + } + +-bool LoongArchTargetLowering::CanLowerReturn( +- CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, +- const SmallVectorImpl &Outs, LLVMContext &Context) const { +- // Any return value split in to more than two values can't be returned +- // directly. +- return Outs.size() <= 2; ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) ++ .addReg(Xj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSGR2VR_W), Xd) ++ .addReg(Xd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; + } + +-SDValue LoongArchTargetLowering::LowerReturn( +- SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, +- const SmallVectorImpl &Outs, +- const SmallVectorImpl &OutVals, const SDLoc &DL, +- SelectionDAG &DAG) const { +- // Stores the assignment of the return value to a location. +- SmallVector RVLocs; ++// Emit the INSERT_FW pseudo instruction. ++// ++// insert_fw_pseudo $vd, $vd_in, $n, $fs ++// => ++// subreg_to_reg $vj:sub_lo, $fs ++// vpickve2gr.w rj, vj, 0 ++// vinsgr2vr.w, vd, rj, lane ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Vd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_W), Rj) ++ .addReg(Vj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_W), Vd) ++ .addReg(Vd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the INSERT_FD pseudo instruction. ++// insert_fd_pseudo $vd, $fs, n ++// => ++// subreg_to_reg $vk:sub_64, $fs ++// vpickve2gr.d rj, vk, 0 ++// vinsgr2vr.d vd, rj, lane ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Vd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_D), Rj) ++ .addReg(Vj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_D), Vd) ++ .addReg(Vd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Info about the registers and stack slot. +- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, +- *DAG.getContext()); ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSVE0_D), Xd) ++ .addReg(Xd_in) ++ .addReg(Xj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- analyzeOutputArgs(CCInfo, Outs, CC_LoongArch); ++// Emit the FILL_FW pseudo instruction. ++// ++// fill_fw_pseudo $vd, $fs ++// => ++// implicit_def $vt1 ++// insert_subreg $vt2:subreg_lo, $vt1, $fs ++// vreplvei.w vd, vt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) ++ .addReg(Vj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vd) ++ .addReg(Vj2) ++ .addImm(0); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- SDValue Glue; +- SmallVector RetOps(1, Chain); ++// Emit the FILL_FD pseudo instruction. ++// ++// fill_fd_pseudo $vd, $fs ++// => ++// implicit_def $vt1 ++// insert_subreg $vt2:subreg_64, $vt1, $fs ++// vreplvei.d vd, vt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) ++ .addReg(Vj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vd) ++ .addReg(Vj2) ++ .addImm(0); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Copy the result values into the output registers. +- for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { +- CCValAssign &VA = RVLocs[i]; +- assert(VA.isRegLoc() && "Can only return in registers!"); ++// Emit the XFILL_FW pseudo instruction. ++// ++// xfill_fw_pseudo $xd, $fs ++// => ++// implicit_def $xt1 ++// insert_subreg $xt2:subreg_lo, $xt1, $fs ++// xvreplve0.w xd, xt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitXFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) ++ .addReg(Xj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_W), Xd).addReg(Xj2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Handle a 'normal' return. +- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); ++// Emit the XFILL_FD pseudo instruction. ++// ++// xfill_fd_pseudo $xd, $fs ++// => ++// implicit_def $xt1 ++// insert_subreg $xt2:subreg_64, $xt1, $fs ++// xvreplve0.d xd, xt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitXFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.isFP64bit()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) ++ .addReg(Xj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_D), Xd).addReg(Xj2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} + +- // Guarantee that all emitted copies are stuck together. +- Glue = Chain.getValue(1); +- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); ++bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { ++ bool IsLegal = false; ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ return isUInt<5>(Imm); + } ++ return IsLegal; ++} + +- RetOps[0] = Chain; // Update chain. ++bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( ++ const MachineFunction &MF, EVT VT) const { + +- // Add the glue node if we have it. +- if (Glue.getNode()) +- RetOps.push_back(Glue); ++ VT = VT.getScalarType(); + +- return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); +-} ++ if (!VT.isSimple()) ++ return false; + +-bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, +- bool ForCodeSize) const { +- assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT"); ++ switch (VT.getSimpleVT().SimpleTy) { ++ case MVT::f32: ++ case MVT::f64: ++ return true; ++ default: ++ break; ++ } + +- if (VT == MVT::f32 && !Subtarget.hasBasicF()) +- return false; +- if (VT == MVT::f64 && !Subtarget.hasBasicD()) ++ return false; ++} ++ ++bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, ++ unsigned Index) const { ++ if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; +- return (Imm.isZero() || Imm.isExactlyValue(+1.0)); ++ ++ return ( ++ (ResVT != MVT::v16i8) && (ResVT != MVT::v8i16) && ++ (Index == 0 || (Index == ResVT.getVectorNumElements() && ++ (ResVT.getSizeInBits() == SrcVT.getSizeInBits() / 2)))); ++} ++ ++Register ++LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, ++ const MachineFunction &MF) const { ++ // Named registers is expected to be fairly rare. For now, just support $r2 ++ // and $r21 since the linux kernel uses them. ++ if (Subtarget.is64Bit()) { ++ Register Reg = StringSwitch(RegName) ++ .Case("$r2", LoongArch::TP_64) ++ .Case("$r21", LoongArch::T9_64) ++ .Default(Register()); ++ if (Reg) ++ return Reg; ++ } else { ++ Register Reg = StringSwitch(RegName) ++ .Case("$r2", LoongArch::TP) ++ .Case("$r21", LoongArch::T9) ++ .Default(Register()); ++ if (Reg) ++ return Reg; ++ } ++ report_fatal_error("Invalid register name global variable"); + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 141f1fd3a..64e06b53f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -1,4 +1,4 @@ +-//=- LoongArchISelLowering.h - LoongArch DAG Lowering Interface -*- C++ -*-===// ++//===- LoongArchISelLowering.h - LoongArch DAG Lowering Interface ---------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,115 +6,553 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file defines the interfaces that LoongArch uses to lower LLVM code into +-// a selection DAG. ++// This file defines the interfaces that LoongArch uses to lower LLVM code into a ++// selection DAG. + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H + ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "LoongArch.h" + #include "llvm/CodeGen/CallingConvLower.h" ++#include "llvm/CodeGen/ISDOpcodes.h" ++#include "llvm/CodeGen/MachineMemOperand.h" + #include "llvm/CodeGen/SelectionDAG.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" + #include "llvm/CodeGen/TargetLowering.h" ++#include "llvm/CodeGen/ValueTypes.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/IR/InlineAsm.h" ++#include "llvm/IR/Type.h" ++#include "llvm/Support/MachineValueType.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++#include ++#include ++#include ++#include ++#include + + namespace llvm { ++ ++class Argument; ++class CCState; ++class CCValAssign; ++class FastISel; ++class FunctionLoweringInfo; ++class MachineBasicBlock; ++class MachineFrameInfo; ++class MachineInstr; ++class LoongArchCCState; ++class LoongArchFunctionInfo; + class LoongArchSubtarget; +-struct LoongArchRegisterInfo; +-namespace LoongArchISD { +-enum NodeType : unsigned { +- FIRST_NUMBER = ISD::BUILTIN_OP_END, +- +- // TODO: add more LoongArchISDs +- CALL, +- RET, +- // 32-bit shifts, directly matching the semantics of the named LoongArch +- // instructions. +- SLL_W, +- SRA_W, +- SRL_W, +- +- // FPR<->GPR transfer operations +- MOVGR2FR_W_LA64, +- MOVFR2GR_S_LA64, +- +- FTINT, +- +- BSTRINS, +- BSTRPICK, +- +-}; +-} // end namespace LoongArchISD +- +-class LoongArchTargetLowering : public TargetLowering { +- const LoongArchSubtarget &Subtarget; +- +-public: +- explicit LoongArchTargetLowering(const TargetMachine &TM, +- const LoongArchSubtarget &STI); +- +- const LoongArchSubtarget &getSubtarget() const { return Subtarget; } +- +- // Provide custom lowering hooks for some operations. +- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; +- void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, +- SelectionDAG &DAG) const override; +- +- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; +- +- // This method returns the name of a target specific DAG node. +- const char *getTargetNodeName(unsigned Opcode) const override; +- +- // Lower incoming arguments, copy physregs into vregs. +- SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, +- bool IsVarArg, +- const SmallVectorImpl &Ins, +- const SDLoc &DL, SelectionDAG &DAG, +- SmallVectorImpl &InVals) const override; +- bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, +- bool IsVarArg, +- const SmallVectorImpl &Outs, +- LLVMContext &Context) const override; +- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, +- const SmallVectorImpl &Outs, +- const SmallVectorImpl &OutVals, const SDLoc &DL, +- SelectionDAG &DAG) const override; +- SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, +- SmallVectorImpl &InVals) const override; +- +-private: +- /// Target-specific function used to lower LoongArch calling conventions. +- typedef bool LoongArchCCAssignFn(unsigned ValNo, MVT ValVT, +- CCValAssign::LocInfo LocInfo, +- CCState &State); +- +- void analyzeInputArgs(CCState &CCInfo, +- const SmallVectorImpl &Ins, +- LoongArchCCAssignFn Fn) const; +- void analyzeOutputArgs(CCState &CCInfo, +- const SmallVectorImpl &Outs, +- LoongArchCCAssignFn Fn) const; +- +- SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; +- SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; +- SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; +- +- MachineBasicBlock * +- EmitInstrWithCustomInserter(MachineInstr &MI, +- MachineBasicBlock *BB) const override; +- SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; +- SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; +- SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; +- SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; +- +- bool isFPImmLegal(const APFloat &Imm, EVT VT, +- bool ForCodeSize) const override; +- +- bool shouldInsertFencesForAtomic(const Instruction *I) const override { +- return isa(I) || isa(I); +- } +-}; ++class LoongArchTargetMachine; ++class SelectionDAG; ++class TargetLibraryInfo; ++class TargetRegisterClass; ++ ++ namespace LoongArchISD { ++ ++ enum NodeType : unsigned { ++ // Start the numbering from where ISD NodeType finishes. ++ FIRST_NUMBER = ISD::BUILTIN_OP_END, ++ ++ // Jump and link (call) ++ JmpLink, ++ ++ // Tail call ++ TailCall, ++ ++ // global address ++ GlobalAddress, ++ ++ // Floating Point Branch Conditional ++ FPBrcond, ++ ++ // Floating Point Compare ++ FPCmp, ++ ++ // Floating Point Conditional Moves ++ CMovFP_T, ++ CMovFP_F, ++ FSEL, ++ ++ // FP-to-int truncation node. ++ TruncIntFP, ++ ++ // Return ++ Ret, ++ ++ // error trap Return ++ ERet, ++ ++ // Software Exception Return. ++ EH_RETURN, ++ ++ DBAR, ++ ++ BSTRPICK, ++ BSTRINS, ++ ++ // Vector comparisons. ++ // These take a vector and return a boolean. ++ VALL_ZERO, ++ VANY_ZERO, ++ VALL_NONZERO, ++ VANY_NONZERO, ++ ++ // Vector Shuffle with mask as an operand ++ VSHF, // Generic shuffle ++ SHF, // 4-element set shuffle. ++ VPACKEV, // Interleave even elements ++ VPACKOD, // Interleave odd elements ++ VILVH, // Interleave left elements ++ VILVL, // Interleave right elements ++ VPICKEV, // Pack even elements ++ VPICKOD, // Pack odd elements ++ ++ // Vector Lane Copy ++ INSVE, // Copy element from one vector to another ++ ++ // Combined (XOR (OR $a, $b), -1) ++ VNOR, ++ ++ VROR, ++ VRORI, ++ XVPICKVE, ++ XVPERMI, ++ XVSHUF4I, ++ REVBD, ++ ++ // Extended vector element extraction ++ VEXTRACT_SEXT_ELT, ++ VEXTRACT_ZEXT_ELT, ++ ++ XVBROADCAST, ++ VBROADCAST, ++ VABSD, ++ UVABSD, ++ }; ++ ++ } // ene namespace LoongArchISD ++ ++ //===--------------------------------------------------------------------===// ++ // TargetLowering Implementation ++ //===--------------------------------------------------------------------===// ++ ++ class LoongArchTargetLowering : public TargetLowering { ++ public: ++ explicit LoongArchTargetLowering(const LoongArchTargetMachine &TM, ++ const LoongArchSubtarget &STI); ++ ++ bool allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AS = 0, Align Alignment = Align(1), ++ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, ++ bool *Fast = nullptr) const override; ++ ++ /// Enable LSX support for the given integer type and Register ++ /// class. ++ void addLSXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); ++ ++ /// Enable LSX support for the given floating-point type and ++ /// Register class. ++ void addLSXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC); ++ ++ /// Enable LASX support for the given integer type and Register ++ /// class. ++ void addLASXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); ++ ++ /// Enable LASX support for the given floating-point type and ++ /// Register class. ++ void addLASXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC); ++ ++ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { ++ return MVT::i32; ++ } ++ ++ EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ++ ISD::NodeType) const override; ++ ++ bool isCheapToSpeculateCttz() const override; ++ bool isCheapToSpeculateCtlz() const override; ++ ++ bool isLegalAddImmediate(int64_t) const override; ++ ++ /// Return the correct alignment for the current calling convention. ++ Align getABIAlignmentForCallingConv(Type *ArgTy, ++ const DataLayout &DL) const override { ++ const Align ABIAlign = DL.getABITypeAlign(ArgTy); ++ if (ArgTy->isVectorTy()) ++ return std::min(ABIAlign, Align(8)); ++ return ABIAlign; ++ } ++ ++ ISD::NodeType getExtendForAtomicOps() const override { ++ return ISD::SIGN_EXTEND; ++ } ++ ++ bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, ++ unsigned Index) const override; ++ ++ void LowerOperationWrapper(SDNode *N, ++ SmallVectorImpl &Results, ++ SelectionDAG &DAG) const override; ++ ++ /// LowerOperation - Provide custom lowering hooks for some operations. ++ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; ++ ++ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, ++ EVT VT) const override; ++ ++ /// ReplaceNodeResults - Replace the results of node with an illegal result ++ /// type with new values built out of custom code. ++ /// ++ void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, ++ SelectionDAG &DAG) const override; ++ ++ /// getTargetNodeName - This method returns the name of a target specific ++ // DAG node. ++ const char *getTargetNodeName(unsigned Opcode) const override; ++ ++ /// getSetCCResultType - get the ISD::SETCC result ValueType ++ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, ++ EVT VT) const override; ++ ++ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; ++ ++ MachineBasicBlock * ++ EmitInstrWithCustomInserter(MachineInstr &MI, ++ MachineBasicBlock *MBB) const override; ++ ++ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { ++ return false; ++ } ++ ++ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; ++ ++ void AdjustInstrPostInstrSelection(MachineInstr &MI, ++ SDNode *Node) const override; ++ ++ void HandleByVal(CCState *, unsigned &, Align) const override; ++ ++ Register getRegisterByName(const char* RegName, LLT VT, ++ const MachineFunction &MF) const override; ++ ++ /// If a physical register, this returns the register that receives the ++ /// exception address on entry to an EH pad. ++ Register ++ getExceptionPointerRegister(const Constant *PersonalityFn) const override { ++ return ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; ++ } ++ ++ /// If a physical register, this returns the register that receives the ++ /// exception typeid on entry to a landing pad. ++ Register ++ getExceptionSelectorRegister(const Constant *PersonalityFn) const override { ++ return ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; ++ } ++ ++ bool isJumpTableRelative() const override { ++ return getTargetMachine().isPositionIndependent(); ++ } ++ ++ CCAssignFn *CCAssignFnForCall() const; ++ ++ CCAssignFn *CCAssignFnForReturn() const; ++ ++ private: ++ template ++ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; ++ ++ /// This function fills Ops, which is the list of operands that will later ++ /// be used when a function call node is created. It also generates ++ /// copyToReg nodes to set up argument registers. ++ void getOpndList(SmallVectorImpl &Ops, ++ std::deque> &RegsToPass, ++ bool IsPICCall, bool GlobalOrExternal, bool IsCallReloc, ++ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain, ++ bool IsTailCall) const; ++ ++ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; ++ ++ // Subtarget Info ++ const LoongArchSubtarget &Subtarget; ++ // Cache the ABI from the TargetMachine, we use it everywhere. ++ const LoongArchABIInfo &ABI; ++ ++ // Create a TargetGlobalAddress node. ++ SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetExternalSymbol node. ++ SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetBlockAddress node. ++ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetJumpTable node. ++ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetConstantPool node. ++ SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Lower Operand helpers ++ SDValue LowerCallResult(SDValue Chain, SDValue InFlag, ++ CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Ins, ++ const SDLoc &dl, SelectionDAG &DAG, ++ SmallVectorImpl &InVals, ++ TargetLowering::CallLoweringInfo &CLI) const; ++ ++ // Lower Operand specifics ++ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; ++ /// Lower VECTOR_SHUFFLE into one of a number of instructions ++ /// depending on the indices in the shuffle. ++ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; ++ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; ++ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, ++ bool IsSRA) const; ++ SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; ++ ++ /// isEligibleForTailCallOptimization - Check whether the call is eligible ++ /// for tail call optimization. ++ bool ++ isEligibleForTailCallOptimization(const CCState &CCInfo, ++ CallLoweringInfo &CLI, MachineFunction &MF, ++ unsigned NextStackOffset, ++ const LoongArchFunctionInfo &FI) const; ++ ++ /// copyByValArg - Copy argument registers which were used to pass a byval ++ /// argument to the stack. Create a stack frame object for the byval ++ /// argument. ++ void copyByValRegs(SDValue Chain, const SDLoc &DL, ++ std::vector &OutChains, SelectionDAG &DAG, ++ const ISD::ArgFlagsTy &Flags, ++ SmallVectorImpl &InVals, ++ const Argument *FuncArg, unsigned FirstReg, ++ unsigned LastReg, const CCValAssign &VA, ++ LoongArchCCState &State) const; ++ ++ /// passByValArg - Pass a byval argument in registers or on stack. ++ void passByValArg(SDValue Chain, const SDLoc &DL, ++ std::deque> &RegsToPass, ++ SmallVectorImpl &MemOpChains, SDValue StackPtr, ++ MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, ++ unsigned FirstReg, unsigned LastReg, ++ const ISD::ArgFlagsTy &Flags, ++ const CCValAssign &VA) const; ++ ++ /// writeVarArgRegs - Write variable function arguments passed in registers ++ /// to the stack. Also create a stack frame object for the first variable ++ /// argument. ++ void writeVarArgRegs(std::vector &OutChains, SDValue Chain, ++ const SDLoc &DL, SelectionDAG &DAG, ++ CCState &State) const; ++ ++ SDValue ++ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Ins, ++ const SDLoc &dl, SelectionDAG &DAG, ++ SmallVectorImpl &InVals) const override; ++ ++ SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, ++ SDValue Arg, const SDLoc &DL, bool IsTailCall, ++ SelectionDAG &DAG) const; ++ ++ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, ++ SmallVectorImpl &InVals) const override; ++ ++ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, ++ bool isVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const override; ++ ++ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Outs, ++ const SmallVectorImpl &OutVals, ++ const SDLoc &dl, SelectionDAG &DAG) const override; ++ ++ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; ++ ++ // Inline asm support ++ ConstraintType getConstraintType(StringRef Constraint) const override; ++ ++ /// Examine constraint string and operand type and determine a weight value. ++ /// The operand object must already have been set up with the operand type. ++ ConstraintWeight getSingleConstraintMatchWeight( ++ AsmOperandInfo &info, const char *constraint) const override; ++ ++ /// This function parses registers that appear in inline-asm constraints. ++ /// It returns pair (0, 0) on failure. ++ std::pair ++ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; ++ ++ std::pair ++ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, ++ StringRef Constraint, MVT VT) const override; ++ ++ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops ++ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is ++ /// true it means one of the asm constraint of the inline asm instruction ++ /// being processed is 'm'. ++ void LowerAsmOperandForConstraint(SDValue Op, ++ std::string &Constraint, ++ std::vector &Ops, ++ SelectionDAG &DAG) const override; ++ ++ unsigned ++ getInlineAsmMemConstraint(StringRef ConstraintCode) const override { ++ if (ConstraintCode == "R") ++ return InlineAsm::Constraint_R; ++ else if (ConstraintCode == "ZC") ++ return InlineAsm::Constraint_ZC; ++ else if (ConstraintCode == "ZB") ++ return InlineAsm::Constraint_ZB; ++ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); ++ } ++ ++ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, ++ Type *Ty, unsigned AS, ++ Instruction *I = nullptr) const override; ++ ++ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; ++ ++ EVT getOptimalMemOpType(const MemOp &Op, ++ const AttributeList &FuncAttributes) const override; ++ ++ /// isFPImmLegal - Returns true if the target can instruction select the ++ /// specified FP immediate natively. If false, the legalizer will ++ /// materialize the FP immediate as a load from a constant pool. ++ bool isFPImmLegal(const APFloat &Imm, EVT VT, ++ bool ForCodeSize) const override; ++ ++ bool useSoftFloat() const override; ++ ++ bool shouldInsertFencesForAtomic(const Instruction *I) const override { ++ return isa(I) || isa(I); ++ } ++ ++ bool mayBeEmittedAsTailCall(const CallInst *CI) const override; ++ ++ /// Emit a sign-extension using sll/sra, seb, or seh appropriately. ++ MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size, unsigned DstReg, ++ unsigned SrcRec) const; ++ ++ MachineBasicBlock *emitLoadAddress(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size) const; ++ ++ MachineBasicBlock *emitXINSERT_B(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitINSERT_H_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size) const; ++ MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, ++ bool isFPCmp, unsigned Opc) const; ++ ++ /// SE ++ MachineBasicBlock *emitLSXCBranchPseudo(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned BranchOp) const; ++ /// Emit the COPY_FW pseudo instruction ++ MachineBasicBlock *emitCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the COPY_FD pseudo instruction ++ MachineBasicBlock *emitCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitCONCAT_VECTORS(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Bytes) const; ++ ++ MachineBasicBlock *emitXCOPY_FW_GPR(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, ++ unsigned EltSizeInBytes) const; ++ ++ MachineBasicBlock *emitXINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ /// Emit the INSERT_FW pseudo instruction ++ MachineBasicBlock *emitINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the INSERT_FD pseudo instruction ++ MachineBasicBlock *emitINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_DF_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ bool IsGPR64) const; ++ /// Emit the FILL_FW pseudo instruction ++ MachineBasicBlock *emitFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the FILL_FD pseudo instruction ++ MachineBasicBlock *emitFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitXFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ }; + + } // end namespace llvm + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td +index bebc83a86..d75d5198b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td +@@ -1,4 +1,4 @@ +-//===- LoongArchInstrFormats.td - LoongArch Instr. Formats -*- tablegen -*-===// ++//===-- LoongArchInstrFormats.td - LoongArch Instruction Formats -----*- tablegen -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -9,396 +9,782 @@ + //===----------------------------------------------------------------------===// + // Describe LoongArch instructions format + // +-// opcode - operation code. +-// rd - destination register operand. +-// r{j/k} - source register operand. +-// immN - immediate data operand. ++// CPU INSTRUCTION FORMATS ++// ++// opcode - operation code. ++// rs - src reg. ++// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). ++// rd - dst reg, only used on 3 regs instr. ++// shamt - only used on shift instructions, contains the shift amount. ++// funct - combined with opcode field give us an operation code. + // + //===----------------------------------------------------------------------===// + +-class LAInst pattern = []> +- : Instruction { ++class StdArch { ++ ++ bits<32> Inst; ++} ++ ++// Format specifies the encoding used by the instruction. This is part of the ++// ad-hoc solution used to emit machine instruction encodings by our machine ++// code emitter. ++class Format val> { ++ bits<4> Value = val; ++} ++ ++def Pseudo : Format<0>; ++def FrmR : Format<1>; ++def FrmI : Format<2>; ++def FrmJ : Format<3>; ++def FrmFR : Format<4>; ++def FrmFI : Format<5>; ++def FrmOther : Format<6>; ++ ++// Generic LoongArch Format ++class InstLA pattern, Format f> ++ : Instruction ++{ + field bits<32> Inst; +- // SoftFail is a field the disassembler can use to provide a way for +- // instructions to not match without killing the whole decode process. It is +- // mainly used for ARM, but Tablegen expects this field to exist or it fails +- // to build the decode table. +- field bits<32> SoftFail = 0; ++ Format Form = f; + + let Namespace = "LoongArch"; ++ + let Size = 4; ++ + let OutOperandList = outs; +- let InOperandList = ins; +- let AsmString = opcstr # "\t" # opnstr; +- let Pattern = pattern; ++ let InOperandList = ins; ++ let AsmString = asmstr; ++ let Pattern = pattern; ++ ++ // ++ // Attributes specific to LoongArch instructions... ++ // ++ bits<4> FormBits = Form.Value; ++ bit isCTI = 0; // Any form of Control Transfer Instruction. ++ // Required for LoongArch ++ bit hasForbiddenSlot = 0; // Instruction has a forbidden slot. ++ bit IsPCRelativeLoad = 0; // Load instruction with implicit source register ++ // ($pc) and with explicit offset and destination ++ // register ++ bit hasFCCRegOperand = 0; // Instruction uses $fcc register ++ ++ // TSFlags layout should be kept in sync with MCTargetDesc/LoongArchBaseInfo.h. ++ let TSFlags{3-0} = FormBits; ++ let TSFlags{4} = isCTI; ++ let TSFlags{5} = hasForbiddenSlot; ++ let TSFlags{6} = IsPCRelativeLoad; ++ let TSFlags{7} = hasFCCRegOperand; ++ ++ let DecoderNamespace = "LoongArch"; ++ ++ field bits<32> SoftFail = 0; + } + +-// Pseudo instructions +-class Pseudo pattern = [], string opcstr = "", +- string opnstr = ""> +- : LAInst { +- let isPseudo = 1; +- let isCodeGenOnly = 1; ++class InstForm pattern, ++ Format f, string opstr = ""> : ++ InstLA { ++ string BaseOpcode = opstr; ++ string Arch; ++} ++ ++class LoongArch_str { ++ string Arch; ++ string BaseOpcode = opstr; ++} ++ ++//===-----------------------------------------------------------===// ++// Format instruction classes in the LoongArch ++//===-----------------------------------------------------------===// ++ ++// R2 classes: 2 registers ++// ++class R2 : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2I op> ++ : R2 { ++ let Inst{31-15} = 0x0; ++ let Inst{14-10} = op; ++} ++ ++class R2F op> ++ : R2 { ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVFI op> ++ : R2 { ++ bits<5> rj; ++ bits<5> fd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVIF op> ++ : R2 { ++ bits<5> fj; ++ bits<5> rd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = rd; ++} ++ ++class R2P op> ++ : R2 { ++ let Inst{31-13} = 0x3240; ++ let Inst{12-10} = op; ++} ++ ++class R2_CSR op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<14> csr; ++ ++ let Inst{31-24} = op; ++ let Inst{23-10} = csr; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; + } + +-// 2R-type +-// +-class Fmt2R op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { ++class R2_SI16 op> ++ : StdArch { ++ bits<5> rd; ++ bits<5> rj; ++ bits<16> si16; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = si16; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2_COND op, bits<5> cond> ++ : StdArch { ++ bits<5> fj; ++ bits<5> fk; ++ bits<3> cd; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class R2_LEVEL op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<8> level; + +- let Inst{31-10} = op; ++ let Inst{31-18} = op; ++ let Inst{17-10} = level; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 3R-type +-// +-class Fmt3R op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { ++class IMM32 op> ++ : StdArch { ++ let Inst{31-16} = 0x0648; ++ let Inst{15-10} = op; ++ let Inst{9-0} = 0; ++} ++ ++class WAIT_FM : StdArch { ++ bits<15> hint; ++ ++ let Inst{31-15} = 0xc91; ++ let Inst{14-0} = hint; ++} ++ ++class R2_INVTLB : StdArch { ++ bits<5> rj; ++ bits<5> op; ++ bits<5> rk; ++ ++ let Inst{31-15} = 0xc93; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = op; ++} ++ ++class BAR_FM op> ++ : StdArch { ++ bits<15> hint; ++ ++ let Inst{31-16} = 0x3872; ++ let Inst{15} = op; ++ let Inst{14-0} = hint; ++} ++ ++class PRELD_FM : StdArch { ++ bits<5> rj; ++ bits<5> hint; ++ bits<12> imm12; ++ ++ let Inst{31-22} = 0xab; ++ let Inst{21-10} = imm12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = hint; ++} ++ ++// R3 classes: 3 registers ++// ++class R3 : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; + +- let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 3RI2-type +-// +-class Fmt3RI2 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<2> imm2; ++class R3I op> ++ : R3 { ++ let Inst{31-22} = 0x0; ++ let Inst{21-15} = op; ++} ++ ++class R3F op> ++ : R3 { ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-21} = 0x8; ++ let Inst{20-15} = op; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class R3MI op> ++ : R3 { ++ let Inst{31-23} = 0x70; ++ let Inst{22-15} = op; ++} ++ ++class AM op> : StdArch { ++ bits<5> rk; ++ bits<17> addr; // rj + 12 bits offset 0 ++ bits<5> rd; ++ ++ let Inst{31-21} = 0x1c3; ++ let Inst{20-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++ ++class R3MF op> ++ : R3 { ++ bits<5> fd; ++ ++ let Inst{31-23} = 0x70; ++ let Inst{22-15} = op; ++ let Inst{4-0} = fd; ++} ++ ++class R3_SA2 op> ++ : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; ++ bits<2> sa; + +- let Inst{31-17} = op; +- let Inst{16-15} = imm2; ++ let Inst{31-22} = 0x0; ++ let Inst{21-17} = op; ++ let Inst{16-15} = sa; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 3RI3-type +-// +-class Fmt3RI3 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<3> imm3; ++class R3_SA3 : StdArch { + bits<5> rk; + bits<5> rj; + bits<5> rd; ++ bits<3> sa; + +- let Inst{31-18} = op; +- let Inst{17-15} = imm3; ++ let Inst{31-18} = 3; ++ let Inst{17-15} = sa; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI5-type +-// +-class Fmt2RI5 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> imm5; ++// R4 classes: 4 registers ++// ++class R4MUL op> ++ : StdArch { ++ bits<5> fa; ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-24} = 0x8; ++ let Inst{23-20} = op; ++ let Inst{19-15} = fa; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class R4CMP op> ++ : StdArch { ++ bits<5> cond; ++ bits<5> fk; ++ bits<5> fj; ++ bits<3> cd; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; ++} ++ ++class R4SEL : StdArch { ++ bits<3> ca; ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-18} = 0x340; ++ let Inst{17-15} = ca; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++// R2_IMM5 classes: 2registers and 1 5bit-immediate ++// ++class R2_IMM5 op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<5> imm5; + +- let Inst{31-15} = op; ++ let Inst{31-20} = 0x4; ++ let Inst{19-18} = op; ++ let Inst{17-15} = 0x1; + let Inst{14-10} = imm5; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI6-type +-// +-class Fmt2RI6 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<6> imm6; ++// R2_IMM6 classes: 2registers and 1 6bit-immediate ++// ++class R2_IMM6 op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<6> imm6; + +- let Inst{31-16} = op; ++ let Inst{31-20} = 0x4; ++ let Inst{19-18} = op; ++ let Inst{17-16} = 0x1; + let Inst{15-10} = imm6; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI8-type +-// +-class Fmt2RI8 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<8> imm8; ++// R2_IMM12 classes: 2 registers and 1 12bit-immediate ++// ++class LOAD_STORE op> ++ : StdArch { ++ bits<5> rd; ++ bits<17> addr; ++ ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++// for reloc ++class LOAD_STORE_RRI op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<12> imm12; + +- let Inst{31-18} = op; +- let Inst{17-10} = imm8; ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI12-type +-// +-class Fmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<12> imm12; ++ ++class R2_IMM12 op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<12> imm12; + +- let Inst{31-22} = op; ++ let Inst{31-25} = 0x1; ++ let Inst{24-22} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI14-type +-// +-class Fmt2RI14 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<14> imm14; ++class LEA_ADDI_FM op> ++ : StdArch { ++ bits<5> rd; ++ bits<17> addr; ++ ++ let Inst{31-25} = 0x1; ++ let Inst{24-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM14 classes: 2 registers and 1 14bit-immediate ++// ++class LL_SC op> ++ : StdArch { ++ bits<5> rd; ++ bits<19> addr; ++ ++ let Inst{31-27} = 4; ++ let Inst{26-24} = op; ++ let Inst{23-10} = addr{13-0}; ++ let Inst{9-5} = addr{18-14}; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM16 classes: 2 registers and 1 16bit-immediate ++// ++class R2_IMM16BEQ op> ++ : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<16> offs16; + +- let Inst{31-24} = op; +- let Inst{23-10} = imm14; ++ let Inst{31-26} = op; ++ let Inst{25-10} = offs16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// 2RI16-type +-// +-class Fmt2RI16 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<16> imm16; ++class R2_IMM16JIRL : StdArch { + bits<5> rj; + bits<5> rd; ++ bits<16> offs16; ++ ++ let Inst{31-26} = 0x13; ++ let Inst{25-10} = offs16; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++// R1_IMM21 classes: 1 registers and 1 21bit-immediate ++// ++class R1_IMM21BEQZ op> ++ : StdArch { ++ bits<5> rj; ++ bits<21> offs21; + + let Inst{31-26} = op; +- let Inst{25-10} = imm16; ++ let Inst{25-10} = offs21{15-0}; + let Inst{9-5} = rj; ++ let Inst{4-0} = offs21{20-16}; ++} ++ ++class R1_CSR op> ++ : StdArch { ++ bits<5> rd; ++ bits<14> csr; ++ ++ let Inst{31-24} = op{7-0}; ++ let Inst{23-10} = csr; ++ let Inst{9-5} = op{12-8}; + let Inst{4-0} = rd; + } + +-// 1RI20-type +-// +-class Fmt1RI20 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<20> imm20; ++class R1_SI20 op> ++ : StdArch { + bits<5> rd; ++ bits<20> si20; + + let Inst{31-25} = op; +- let Inst{24-5} = imm20; ++ let Inst{24-5} = si20; + let Inst{4-0} = rd; + } + +-// 1RI21-type +-// +-class Fmt1RI21 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<21> imm21; ++class R1_CACHE : StdArch { + bits<5> rj; ++ bits<5> op; ++ bits<12> si12; + +- let Inst{31-26} = op; +- let Inst{25-10} = imm21{15-0}; ++ let Inst{31-22} = 0x18; ++ let Inst{21-10} = si12; + let Inst{9-5} = rj; +- let Inst{4-0} = imm21{20-16}; ++ let Inst{4-0} = op; ++} ++ ++class R1_SEQ op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> offset; ++ bits<8> seq; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = seq; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = 0b00000; + } + +-// I15-type +-// +-class FmtI15 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<15> imm15; ++class R1_BCEQZ op> ++ : StdArch { ++ bits<21> offset; ++ bits<3> cj; + +- let Inst{31-15} = op; +- let Inst{14-0} = imm15; ++ let Inst{31-26} = 0x12; ++ let Inst{25-10} = offset{15-0}; ++ let Inst{9-8} = op; ++ let Inst{7-5} = cj; ++ let Inst{4-0} = offset{20-16}; + } + +-// I26-type +-// +-class FmtI26 op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<26> imm26; ++// IMM26 classes: 1 26bit-immediate ++// ++class IMM26B op> ++ : StdArch { ++ bits<26> offs26; + + let Inst{31-26} = op; +- let Inst{25-10} = imm26{15-0}; +- let Inst{9-0} = imm26{25-16}; ++ let Inst{25-10} = offs26{15-0}; ++ let Inst{9-0} = offs26{25-16}; ++} ++ ++// LoongArch Pseudo Instructions Format ++class LoongArchPseudo pattern> : ++ InstLA { ++ let isCodeGenOnly = 1; ++ let isPseudo = 1; ++} ++ ++// Pseudo-instructions for alternate assembly syntax (never used by codegen). ++// These are aliases that require C++ handling to convert to the target ++// instruction, while InstAliases can be handled directly by tblgen. ++class LoongArchAsmPseudoInst: ++ InstLA { ++ let isPseudo = 1; ++ let Pattern = []; + } + +-// FmtBSTR_W +-// +-class FmtBSTR_W op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { ++// ++// Misc instruction classes ++class ASSERT op> ++ : StdArch { ++ bits<5> rk; ++ bits<5> rj; ++ ++ let Inst{31-17} = 0x0; ++ let Inst{16-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = 0x0; ++} ++ ++class CODE15 op> ++ : StdArch { ++ bits<15> Code; ++ ++ let Inst{31-22} = 0x0; ++ let Inst{21-15} = op; ++ let Inst{14-0} = Code; ++} ++ ++class INSERT_BIT32 op> ++ : StdArch { + bits<5> msbw; + bits<5> lsbw; + bits<5> rj; + bits<5> rd; + +- let Inst{31-21} = op{11-1}; ++ let Inst{31-21} = 0x3; + let Inst{20-16} = msbw; +- let Inst{15} = op{0}; ++ let Inst{15} = op; + let Inst{14-10} = lsbw; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// FmtBSTR_D +-// +-class FmtBSTR_D op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { ++class INSERT_BIT64 op> ++ : StdArch { + bits<6> msbd; + bits<6> lsbd; + bits<5> rj; + bits<5> rd; + +- let Inst{31-22} = op; ++ let Inst{31-23} = 0x1; ++ let Inst{22} = op; + let Inst{21-16} = msbd; + let Inst{15-10} = lsbd; + let Inst{9-5} = rj; + let Inst{4-0} = rd; + } + +-// FmtASRT +-// +-class FmtASRT op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<5> rk; ++class MOVGPR2FCSR: StdArch { ++ bits<5> fcsr; + bits<5> rj; + +- let Inst{31-15} = op; +- let Inst{14-10} = rk; ++ let Inst{31-10} = 0x4530; + let Inst{9-5} = rj; +- let Inst{4-0} = 0x0; ++ let Inst{4-0} = fcsr; + } + +-// FmtPRELD +-// < 0b0010101011 | I12 | rj | I5> +-class FmtPRELD pattern = []> +- : LAInst { +- bits<12> imm12; +- bits<5> rj; +- bits<5> imm5; ++class MOVFCSR2GPR: StdArch { ++ bits<5> fcsr; ++ bits<5> rd; + +- let Inst{31-22} = 0b0010101011; +- let Inst{21-10} = imm12; +- let Inst{9-5} = rj; +- let Inst{4-0} = imm5; ++ let Inst{31-10} = 0x4532; ++ let Inst{9-5} = fcsr; ++ let Inst{4-0} = rd; + } + +-// FmtPRELDX +-// < 0b00111000001011000 | rk | rj | I5> +-class FmtPRELDX pattern = []> +- : LAInst { +- bits<5> rk; ++class MOVFGR2FCFR: StdArch { ++ bits<3> cd; ++ bits<5> fj; ++ ++ let Inst{31-10} = 0x4534; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; ++} ++ ++class MOVFCFR2FGR: StdArch { ++ bits<3> cj; ++ bits<5> fd; ++ ++ let Inst{31-10} = 0x4535; ++ let Inst{9-8} = 0; ++ let Inst{7-5} = cj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVGPR2FCFR: StdArch { ++ bits<3> cd; + bits<5> rj; +- bits<5> imm5; + +- let Inst{31-15} = 0b00111000001011000; +- let Inst{14-10} = rk; ++ let Inst{31-10} = 0x4536; + let Inst{9-5} = rj; +- let Inst{4-0} = imm5; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; + } + +-// FmtCSR +-// +-class FmtCSR op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<14> csr_num; ++class MOVFCFR2GPR: StdArch { ++ bits<3> cj; + bits<5> rd; + +- let Inst{31-24} = op{12-5}; +- let Inst{23-10} = csr_num; +- let Inst{9-5} = op{4-0}; ++ let Inst{31-10} = 0x4537; ++ let Inst{9-8} = 0; ++ let Inst{7-5} = cj; + let Inst{4-0} = rd; + } + +-// FmtCSRXCHG +-// +-class FmtCSRXCHG op, dag outs, dag ins, string opcstr, string opnstr, +- list pattern = []> +- : LAInst { +- bits<14> csr_num; +- bits<5> rj; +- bits<5> rd; ++class LoongArchInst : InstLA<(outs), (ins), "", [], FrmOther> { ++} ++class JMP_OFFS_2R op> : LoongArchInst { ++ bits<5> rs; ++ bits<5> rd; ++ bits<16> offset; + +- let Inst{31-24} = op; +- let Inst{23-10} = csr_num; +- let Inst{9-5} = rj; +- let Inst{4-0} = rd; ++ bits<32> Inst; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = offset; ++ let Inst{9-5} = rs; ++ let Inst{4-0} = rd; + } + +-// FmtCACOP +-// <0b0000011000 | I12 | rj | I5> +-class FmtCACOP pattern = []> +- : LAInst { +- bits<12> imm12; +- bits<5> rj; +- bits<5> op; ++class FJ op> : StdArch ++{ ++ bits<26> target; + +- let Inst{31-22} = 0b0000011000; +- let Inst{21-10} = imm12; +- let Inst{9-5} = rj; +- let Inst{4-0} = op; ++ let Inst{31-26} = op; ++ let Inst{25-10} = target{15-0}; ++ let Inst{9-0} = target{25-16}; + } + +-// FmtIMM32 +-// +-class FmtI32 op, string opstr, list pattern = []> +- : LAInst<(outs), (ins), opstr, "", pattern> { +- let Inst{31-0} = op; ++class LUI_FM : StdArch { ++ bits<5> rt; ++ bits<16> imm16; ++ ++ let Inst{31-26} = 0xf; ++ let Inst{25-21} = 0; ++ let Inst{20-16} = rt; ++ let Inst{15-0} = imm16; + } + +-// FmtINVTLB +-// <0b00000110010010011 | rk | rj | I5> +-class FmtINVTLB pattern = []> +- : LAInst { +- bits<5> rk; ++class R2_IMM12M_STD op> : StdArch { + bits<5> rj; +- bits<5> op; ++ bits<5> rd; ++ bits<12> imm12; + +- let Inst{31-15} = 0b00000110010010011; +- let Inst{14-10} = rk; ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = imm12; + let Inst{9-5} = rj; +- let Inst{4-0} = op; ++ let Inst{4-0} = rd; + } + +-// FmtLDPTE +-// <0b00000110010001 | seq | rj | 00000> +-class FmtLDPTE pattern = []> +- : LAInst { +- bits<8> seq; +- bits<5> rj; ++class LLD_2R Code> : LoongArchInst { ++ bits<5> rd; ++ bits<19> addr; ++ bits<5> rj = addr{18-14}; ++ bits<14> offset = addr{13-0}; + +- let Inst{31-18} = 0b00000110010001; +- let Inst{17-10} = seq; ++ bits<32> Inst; ++ ++ let Inst{31-27} = 0x4; ++ let Inst{26-24} = Code; ++ let Inst{23-10} = offset; + let Inst{9-5} = rj; +- let Inst{4-0} = 0b00000; ++ let Inst{4-0} = rd; + } ++ ++class CEQS_FM op> { ++ bits<5> fj; ++ bits<5> fk; ++ bits<3> cd; ++ bits<5> cond; ++ ++ bits<32> Inst; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index bcbd4b28f..00abd9167 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -1,4 +1,4 @@ +-//=- LoongArchInstrInfo.cpp - LoongArch Instruction Information -*- C++ -*-===// ++//===- LoongArchInstrInfo.cpp - LoongArch Instruction Information -------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,105 +11,1030 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchInstrInfo.h" +-#include "LoongArch.h" +-#include "LoongArchMachineFunctionInfo.h" ++#include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/RegisterScavenging.h" ++#include "llvm/CodeGen/TargetOpcodes.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/MC/MCInstrDesc.h" ++#include "llvm/Target/TargetMachine.h" ++#include + + using namespace llvm; + + #define GET_INSTRINFO_CTOR_DTOR + #include "LoongArchGenInstrInfo.inc" + +-LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) ++// Pin the vtable to this file. ++void LoongArchInstrInfo::anchor() {} ++LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) + : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, +- LoongArch::ADJCALLSTACKUP) {} ++ LoongArch::ADJCALLSTACKUP), ++ RI(), Subtarget(STI) {} ++ ++const LoongArchRegisterInfo &LoongArchInstrInfo::getRegisterInfo() const { ++ return RI; ++} ++ ++/// isLoadFromStackSlot - If the specified machine instruction is a direct ++/// load from a stack slot, return the virtual or physical register number of ++/// the destination along with the FrameIndex of the loaded stack slot. If ++/// not, return 0. This predicate must return 0 if the instruction has ++/// any side effects other than loading from the stack slot. ++unsigned LoongArchInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const { ++ unsigned Opc = MI.getOpcode(); ++ if ((Opc == LoongArch::LD_W) || (Opc == LoongArch::LD_D) || ++ (Opc == LoongArch::FLD_S) || (Opc == LoongArch::FLD_D)) { ++ if ((MI.getOperand(1).isFI()) && // is a stack slot ++ (MI.getOperand(2).isImm()) && // the imm is zero ++ (isZeroImm(MI.getOperand(2)))) { ++ FrameIndex = MI.getOperand(1).getIndex(); ++ return MI.getOperand(0).getReg(); ++ } ++ } ++ return 0; ++} ++ ++/// isStoreToStackSlot - If the specified machine instruction is a direct ++/// store to a stack slot, return the virtual or physical register number of ++/// the source reg along with the FrameIndex of the loaded stack slot. If ++/// not, return 0. This predicate must return 0 if the instruction has ++/// any side effects other than storing to the stack slot. ++unsigned LoongArchInstrInfo::isStoreToStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const { ++ unsigned Opc = MI.getOpcode(); ++ if ((Opc == LoongArch::ST_D) || (Opc == LoongArch::ST_W) || ++ (Opc == LoongArch::FST_S) ||(Opc == LoongArch::FST_D)) { ++ if ((MI.getOperand(1).isFI()) && // is a stack slot ++ (MI.getOperand(2).isImm()) && // the imm is zero ++ (isZeroImm(MI.getOperand(2)))) { ++ FrameIndex = MI.getOperand(1).getIndex(); ++ return MI.getOperand(0).getReg(); ++ } ++ } ++ return 0; ++} + + void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, MCRegister DstReg, ++ MachineBasicBlock::iterator I, ++ const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { +- if (LoongArch::GPRRegClass.contains(DstReg, SrcReg)) { +- BuildMI(MBB, MBBI, DL, get(LoongArch::OR), DstReg) +- .addReg(SrcReg, getKillRegState(KillSrc)) +- .addReg(LoongArch::R0); +- return; ++ unsigned Opc = 0, ZeroReg = 0; ++ unsigned ZeroImm = 1; ++ if (LoongArch::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. ++ if (LoongArch::GPR32RegClass.contains(SrcReg)) { ++ Opc = LoongArch::OR32, ZeroReg = LoongArch::ZERO; ++ } ++ else if (LoongArch::FGR32RegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVFR2GR_S; ++ else if (LoongArch::FCFRRegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVCF2GR; + } +- +- // FPR->FPR copies. +- unsigned Opc; +- if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) { ++ else if (LoongArch::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. ++ if (LoongArch::FGR32RegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2FR_W; ++ else if (LoongArch::FCFRRegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2CF; ++ } ++ else if (LoongArch::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = LoongArch::FMOV_S; +- } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { ++ else if (LoongArch::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = LoongArch::FMOV_D; +- } else { +- // TODO: support other copies. +- llvm_unreachable("Impossible reg-to-reg copy"); ++ else if (LoongArch::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. ++ if (LoongArch::GPR64RegClass.contains(SrcReg)) ++ Opc = LoongArch::OR, ZeroReg = LoongArch::ZERO_64; ++ else if (LoongArch::FGR64RegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVFR2GR_D; ++ else if (LoongArch::FCFRRegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVCF2GR; ++ } ++ else if (LoongArch::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. ++ if (LoongArch::FGR64RegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2FR_D; ++ else if (LoongArch::FCFRRegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2CF; + } ++ else if (LoongArch::FGR32RegClass.contains(DestReg)) // Copy to FGR32 Reg ++ Opc = LoongArch::MOVCF2FR; ++ else if (LoongArch::FGR32RegClass.contains(SrcReg)) // Copy from FGR32 Reg ++ Opc = LoongArch::MOVFR2CF; ++ else if (LoongArch::FGR64RegClass.contains(DestReg)) // Copy to FGR64 Reg ++ Opc = LoongArch::MOVCF2FR; ++ else if (LoongArch::FGR64RegClass.contains(SrcReg)) // Copy from FGR64 Reg ++ Opc = LoongArch::MOVFR2CF; ++ else if (LoongArch::LSX128BRegClass.contains(DestReg)) { // Copy to LSX reg ++ if (LoongArch::LSX128BRegClass.contains(SrcReg)) ++ Opc = LoongArch::VORI_B, ZeroImm = 0; ++ } else if (LoongArch::LASX256BRegClass.contains( ++ DestReg)) { // Copy to LASX reg ++ if (LoongArch::LASX256BRegClass.contains(SrcReg)) ++ Opc = LoongArch::XVORI_B, ZeroImm = 0; ++ } ++ ++ assert(Opc && "Cannot copy registers"); ++ ++ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); ++ ++ if (DestReg) ++ MIB.addReg(DestReg, RegState::Define); ++ ++ if (SrcReg) ++ MIB.addReg(SrcReg, getKillRegState(KillSrc)); ++ ++ if (ZeroReg) ++ MIB.addReg(ZeroReg); + +- BuildMI(MBB, MBBI, DL, get(Opc), DstReg) +- .addReg(SrcReg, getKillRegState(KillSrc)); ++ if (!ZeroImm) ++ MIB.addImm(0); ++} ++ ++static bool isORCopyInst(const MachineInstr &MI) { ++ switch (MI.getOpcode()) { ++ default: ++ break; ++ case LoongArch::OR: ++ if (MI.getOperand(2).getReg() == LoongArch::ZERO_64) ++ return true; ++ break; ++ case LoongArch::OR32: ++ if (MI.getOperand(2).getReg() == LoongArch::ZERO) ++ return true; ++ break; ++ } ++ return false; ++} ++ ++/// We check for the common case of 'or', as it's LoongArch' preferred instruction ++/// for GPRs but we have to check the operands to ensure that is the case. ++/// Other move instructions for LoongArch are directly identifiable. ++Optional ++LoongArchInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { ++ if (MI.isMoveReg() || isORCopyInst(MI)) { ++ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; ++ } ++ return None; + } + +-void LoongArchInstrInfo::storeRegToStackSlot( +- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, +- bool IsKill, int FI, const TargetRegisterClass *RC, +- const TargetRegisterInfo *TRI) const { ++void LoongArchInstrInfo:: ++storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ Register SrcReg, bool isKill, int FI, ++ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, ++ int64_t Offset) const { + DebugLoc DL; +- if (I != MBB.end()) +- DL = I->getDebugLoc(); +- MachineFunction *MF = MBB.getParent(); +- MachineFrameInfo &MFI = MF->getFrameInfo(); +- +- unsigned Opcode; +- if (LoongArch::GPRRegClass.hasSubClassEq(RC)) +- Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 +- ? LoongArch::ST_W +- : LoongArch::ST_D; +- else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) +- Opcode = LoongArch::FST_S; +- else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) +- Opcode = LoongArch::FST_D; +- else +- llvm_unreachable("Can't store this register to stack slot"); ++ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + +- MachineMemOperand *MMO = MF->getMachineMemOperand( +- MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, +- MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); ++ unsigned Opc = 0; ++ if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::ST_W; ++ else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::ST_D; ++ else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FST_D; ++ else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FST_S; + +- BuildMI(MBB, I, DL, get(Opcode)) +- .addReg(SrcReg, getKillRegState(IsKill)) ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) ++ Opc = LoongArch::VST; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) ++ Opc = LoongArch::VST_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f32)) ++ Opc = LoongArch::VST_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v2f64)) ++ Opc = LoongArch::VST_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) ++ Opc = LoongArch::XVST; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) ++ Opc = LoongArch::XVST_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v8f32)) ++ Opc = LoongArch::XVST_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f64)) ++ Opc = LoongArch::XVST_D; ++ ++ assert(Opc && "Register class not handled!"); ++ BuildMI(MBB, I, DL, get(Opc)) ++ .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) +- .addImm(0) ++ .addImm(Offset) + .addMemOperand(MMO); + } + +-void LoongArchInstrInfo::loadRegFromStackSlot( +- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, +- int FI, const TargetRegisterClass *RC, +- const TargetRegisterInfo *TRI) const { ++void LoongArchInstrInfo:: ++loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ Register DestReg, int FI, const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, int64_t Offset) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); +- MachineFunction *MF = MBB.getParent(); +- MachineFrameInfo &MFI = MF->getFrameInfo(); +- +- unsigned Opcode; +- if (LoongArch::GPRRegClass.hasSubClassEq(RC)) +- Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 +- ? LoongArch::LD_W +- : LoongArch::LD_D; +- else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) +- Opcode = LoongArch::FLD_S; +- else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) +- Opcode = LoongArch::FLD_D; +- else +- llvm_unreachable("Can't load this register from stack slot"); ++ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); ++ unsigned Opc = 0; + +- MachineMemOperand *MMO = MF->getMachineMemOperand( +- MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, +- MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); ++ if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::LD_W; ++ else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::LD_D; ++ else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FLD_S; ++ else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FLD_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) ++ Opc = LoongArch::VLD; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) ++ Opc = LoongArch::VLD_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f32)) ++ Opc = LoongArch::VLD_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v2f64)) ++ Opc = LoongArch::VLD_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) ++ Opc = LoongArch::XVLD; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) ++ Opc = LoongArch::XVLD_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v8f32)) ++ Opc = LoongArch::XVLD_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f64)) ++ Opc = LoongArch::XVLD_D; + +- BuildMI(MBB, I, DL, get(Opcode), DstReg) ++ assert(Opc && "Register class not handled!"); ++ ++ BuildMI(MBB, I, DL, get(Opc), DestReg) + .addFrameIndex(FI) +- .addImm(0) ++ .addImm(Offset) + .addMemOperand(MMO); + } ++ ++bool LoongArchInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { ++ MachineBasicBlock &MBB = *MI.getParent(); ++ switch (MI.getDesc().getOpcode()) { ++ default: ++ return false; ++ case LoongArch::RetRA: ++ expandRetRA(MBB, MI); ++ break; ++ case LoongArch::ERet: ++ expandERet(MBB, MI); ++ break; ++ case LoongArch::PseudoFFINT_S_W: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_W, LoongArch::MOVGR2FR_W, false); ++ break; ++ case LoongArch::PseudoFFINT_S_L: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_L, LoongArch::MOVGR2FR_D, true); ++ break; ++ case LoongArch::PseudoFFINT_D_W: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_W, LoongArch::MOVGR2FR_W, true); ++ break; ++ case LoongArch::PseudoFFINT_D_L: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_L, LoongArch::MOVGR2FR_D, true); ++ break; ++ case LoongArch::LoongArcheh_return32: ++ case LoongArch::LoongArcheh_return64: ++ expandEhReturn(MBB, MI); ++ break; ++ } ++ ++ MBB.erase(MI); ++ return true; ++} ++ ++/// getOppositeBranchOpc - Return the inverse of the specified ++/// opcode, e.g. turning BEQ to BNE. ++unsigned LoongArchInstrInfo::getOppositeBranchOpc(unsigned Opc) const { ++ switch (Opc) { ++ default: llvm_unreachable("Illegal opcode!"); ++ case LoongArch::BEQ32: return LoongArch::BNE32; ++ case LoongArch::BEQ: return LoongArch::BNE; ++ case LoongArch::BNE32: return LoongArch::BEQ32; ++ case LoongArch::BNE: return LoongArch::BEQ; ++ case LoongArch::BEQZ32: return LoongArch::BNEZ32; ++ case LoongArch::BEQZ: return LoongArch::BNEZ; ++ case LoongArch::BNEZ32: return LoongArch::BEQZ32; ++ case LoongArch::BNEZ: return LoongArch::BEQZ; ++ case LoongArch::BCEQZ: return LoongArch::BCNEZ; ++ case LoongArch::BCNEZ: return LoongArch::BCEQZ; ++ case LoongArch::BLT32: return LoongArch::BGE32; ++ case LoongArch::BLT: return LoongArch::BGE; ++ case LoongArch::BGE32: return LoongArch::BLT32; ++ case LoongArch::BGE: return LoongArch::BLT; ++ case LoongArch::BLTU32: return LoongArch::BGEU32; ++ case LoongArch::BLTU: return LoongArch::BGEU; ++ case LoongArch::BGEU32: return LoongArch::BLTU32; ++ case LoongArch::BGEU: return LoongArch::BLTU; ++ } ++} ++ ++void LoongArchInstrInfo::adjustReg(unsigned DestReg, unsigned SrcReg, ++ int64_t Amount, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I, ++ MachineInstr::MIFlag Flag) const { ++ LoongArchABIInfo ABI = Subtarget.getABI(); ++ DebugLoc DL; ++ unsigned ADDI = ABI.GetPtrAddiOp(); ++ ++ if (Amount == 0) ++ return; ++ ++ if (isInt<12>(Amount)) { ++ // addi $DestReg, $SrcReg, amount ++ BuildMI(MBB, I, DL, get(ADDI), DestReg) ++ .addReg(SrcReg) ++ .addImm(Amount) ++ .setMIFlag(Flag); ++ } else { ++ // For numbers which are not 12bit integers we synthesize Amount inline ++ // then add or subtract it from $SrcReg. ++ unsigned Opc = ABI.GetPtrAddOp(); ++ if (Amount < 0) { ++ Opc = ABI.GetPtrSubOp(); ++ Amount = -Amount; ++ } ++ unsigned Reg = loadImmediate(Amount, MBB, I, DL); ++ BuildMI(MBB, I, DL, get(Opc), DestReg) ++ .addReg(SrcReg) ++ .addReg(Reg, RegState::Kill) ++ .setMIFlag(Flag); ++ } ++} ++ ++/// This function generates the sequence of instructions needed to get the ++/// result of adding register REG and immediate IMM. ++unsigned LoongArchInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator II, ++ const DebugLoc &DL) const { ++ const TargetRegisterClass *RC = Subtarget.isABI_LP64() ++ ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass; ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, Subtarget.is64Bit()); ++ unsigned DstReg = MBB.getParent()->getRegInfo().createVirtualRegister(RC); ++ unsigned SrcReg = ++ Subtarget.isABI_LP64() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ // Build the instructions in Seq. ++ for (auto &Inst : Seq) { ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ BuildMI(MBB, II, DL, get(Inst.Opc), DstReg).addImm(Inst.Imm); ++ else ++ BuildMI(MBB, II, DL, get(Inst.Opc), DstReg) ++ .addReg(SrcReg, RegState::Kill) ++ .addImm(Inst.Imm); ++ SrcReg = DstReg; ++ } ++ return DstReg; ++} ++ ++unsigned LoongArchInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { ++ return (Opc == LoongArch::B || Opc == LoongArch::B32 || ++ Opc == LoongArch::BEQZ || Opc == LoongArch::BEQZ32 || ++ Opc == LoongArch::BNEZ || Opc == LoongArch::BNEZ32 || ++ Opc == LoongArch::BCEQZ || ++ Opc == LoongArch::BCNEZ || ++ Opc == LoongArch::BEQ || Opc == LoongArch::BEQ32 || ++ Opc == LoongArch::BNE || Opc == LoongArch::BNE32 || ++ Opc == LoongArch::BLT || Opc == LoongArch::BLT32 || ++ Opc == LoongArch::BGE || Opc == LoongArch::BGE32 || ++ Opc == LoongArch::BLTU || Opc == LoongArch::BLTU32 || ++ Opc == LoongArch::BGEU || Opc == LoongArch::BGEU32) ? Opc : 0; ++} ++ ++void LoongArchInstrInfo::expandRetRA(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ ++ MachineInstrBuilder MIB; ++ ++ if (Subtarget.is64Bit()) ++ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn64)) ++ .addReg(LoongArch::RA_64, RegState::Undef); ++ else ++ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn)) ++ .addReg(LoongArch::RA, RegState::Undef); ++ ++ // Retain any imp-use flags. ++ for (auto & MO : I->operands()) { ++ if (MO.isImplicit()) ++ MIB.add(MO); ++ } ++} ++ ++void LoongArchInstrInfo::expandERet(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::ERTN)); ++} ++ ++std::pair ++LoongArchInstrInfo::compareOpndSize(unsigned Opc, ++ const MachineFunction &MF) const { ++ const MCInstrDesc &Desc = get(Opc); ++ assert(Desc.NumOperands == 2 && "Unary instruction expected."); ++ const LoongArchRegisterInfo *RI = &getRegisterInfo(); ++ unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); ++ unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); ++ ++ return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); ++} ++ ++void LoongArchInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I, ++ unsigned CvtOpc, unsigned MovOpc, ++ bool IsI64) const { ++ const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); ++ const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); ++ unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; ++ unsigned KillSrc = getKillRegState(Src.isKill()); ++ DebugLoc DL = I->getDebugLoc(); ++ bool DstIsLarger, SrcIsLarger; ++ ++ std::tie(DstIsLarger, SrcIsLarger) = ++ compareOpndSize(CvtOpc, *MBB.getParent()); ++ ++ if (DstIsLarger) ++ TmpReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); ++ ++ if (SrcIsLarger) ++ DstReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); ++ ++ BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); ++ BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); ++} ++ ++void LoongArchInstrInfo::expandEhReturn(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ // This pseudo instruction is generated as part of the lowering of ++ // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and ++ // indirect jump to TargetReg ++ LoongArchABIInfo ABI = Subtarget.getABI(); ++ unsigned ADD = ABI.GetPtrAddOp(); ++ unsigned SP = Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP; ++ unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; ++ unsigned T8 = Subtarget.is64Bit() ? LoongArch::T8_64 : LoongArch::T8; ++ unsigned ZERO = Subtarget.is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ unsigned OffsetReg = I->getOperand(0).getReg(); ++ unsigned TargetReg = I->getOperand(1).getReg(); ++ ++ // add $ra, $v0, $zero ++ // add $sp, $sp, $v1 ++ // jr $ra (via RetRA) ++ const TargetMachine &TM = MBB.getParent()->getTarget(); ++ if (TM.isPositionIndependent()) ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), T8) ++ .addReg(TargetReg) ++ .addReg(ZERO); ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), RA) ++ .addReg(TargetReg) ++ .addReg(ZERO); ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), SP).addReg(SP).addReg(OffsetReg); ++ expandRetRA(MBB, I); ++} ++ ++ ++bool LoongArchInstrInfo::isZeroImm(const MachineOperand &op) const { ++ return op.isImm() && op.getImm() == 0; ++} ++ ++/// insertNoop - If data hazard condition is found insert the target nop ++/// instruction. ++// FIXME: This appears to be dead code. ++void LoongArchInstrInfo:: ++insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const ++{ ++ DebugLoc DL; ++ BuildMI(MBB, MI, DL, get(LoongArch::NOP)); ++} ++ ++MachineMemOperand * ++LoongArchInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, ++ MachineMemOperand::Flags Flags) const { ++ MachineFunction &MF = *MBB.getParent(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ ++ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), ++ Flags, MFI.getObjectSize(FI), ++ MFI.getObjectAlign(FI)); ++} ++ ++//===----------------------------------------------------------------------===// ++// Branch Analysis ++//===----------------------------------------------------------------------===// ++ ++void LoongArchInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, ++ MachineBasicBlock *&BB, ++ SmallVectorImpl &Cond) const { ++ assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); ++ int NumOp = Inst->getNumExplicitOperands(); ++ ++ // for both int and fp branches, the last explicit operand is the ++ // MBB. ++ BB = Inst->getOperand(NumOp-1).getMBB(); ++ Cond.push_back(MachineOperand::CreateImm(Opc)); ++ ++ for (int i = 0; i < NumOp-1; i++) ++ Cond.push_back(Inst->getOperand(i)); ++} ++ ++bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify) const { ++ SmallVector BranchInstrs; ++ BranchType BT = analyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs); ++ ++ return (BT == BT_None) || (BT == BT_Indirect); ++} ++ ++MachineInstr * ++LoongArchInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ const DebugLoc &DL, ++ ArrayRef Cond) const { ++ unsigned Opc = Cond[0].getImm(); ++ const MCInstrDesc &MCID = get(Opc); ++ MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); ++ ++ for (unsigned i = 1; i < Cond.size(); ++i) { ++ assert((Cond[i].isImm() || Cond[i].isReg()) && ++ "Cannot copy operand for conditional branch!"); ++ MIB.add(Cond[i]); ++ } ++ MIB.addMBB(TBB); ++ return MIB.getInstr(); ++} ++ ++unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock *TBB, ++ MachineBasicBlock *FBB, ++ ArrayRef Cond, ++ const DebugLoc &DL, ++ int *BytesAdded) const { ++ unsigned UncondBrOpc = LoongArch::B; ++ // Shouldn't be a fall through. ++ assert(TBB && "insertBranch must not be told to insert a fallthrough"); ++ if (BytesAdded) ++ *BytesAdded = 0; ++ ++ // # of condition operands: ++ // Unconditional branches: 0 ++ // Floating point branches: 1 (opc) ++ // Int BranchZero: 2 (opc, reg) ++ // Int Branch: 3 (opc, reg0, reg1) ++ assert((Cond.size() <= 3) && ++ "# of LoongArch branch conditions must be <= 3!"); ++ ++ // Two-way Conditional branch. ++ if (FBB) { ++ MachineInstr &MI1 = *BuildCondBr(MBB, TBB, DL, Cond); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI1); ++ MachineInstr &MI2 = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI2); ++ return 2; ++ } ++ ++ // One way branch. ++ // Unconditional branch. ++ if (Cond.empty()) { ++ MachineInstr &MI = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI); ++ } ++ else {// Conditional branch. ++ MachineInstr &MI = *BuildCondBr(MBB, TBB, DL, Cond); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI); ++ } ++ return 1; ++} ++ ++void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock &DestBB, ++ MachineBasicBlock &RestoreBB, ++ const DebugLoc &DL, ++ int64_t BrOffset, ++ RegScavenger *RS) const { ++ assert(RS && "RegScavenger required for long branching"); ++ assert(MBB.empty() && ++ "new block should be inserted for expanding unconditional branch"); ++ assert(MBB.pred_size() == 1); ++ ++ MachineFunction *MF = MBB.getParent(); ++ MachineRegisterInfo &MRI = MF->getRegInfo(); ++ const LoongArchSubtarget &Subtarget = MF->getSubtarget(); ++ bool is64 = Subtarget.isABI_LP64(); ++ const TargetRegisterClass *RC = ++ is64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ ++ if (!is64 && !isInt<32>(BrOffset)) ++ report_fatal_error( ++ "Branch offsets outside of the signed 32-bit range not supported"); ++ ++ unsigned ScratchReg = MRI.createVirtualRegister(RC); ++ unsigned ZeroReg = is64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ auto II = MBB.end(); ++ ++ MachineInstr &Pcaddu12iMI = ++ *BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_PCADDU12I), ScratchReg) ++ .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); ++ BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_ADDID2Op), ScratchReg) ++ .addReg(ScratchReg) ++ .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); ++ BuildMI(MBB, II, DL, get(LoongArch::JIRL)) ++ .addReg(ZeroReg) ++ .addReg(ScratchReg, RegState::Kill) ++ .addImm(0); ++ RS->enterBasicBlockEnd(MBB); ++ unsigned Scav = RS->scavengeRegisterBackwards( ++ *RC, MachineBasicBlock::iterator(Pcaddu12iMI), false, 0); ++ MRI.replaceRegWith(ScratchReg, Scav); ++ MRI.clearVirtRegs(); ++ RS->setRegUsed(Scav); ++} ++ ++unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, ++ int *BytesRemoved) const { ++ if (BytesRemoved) ++ *BytesRemoved = 0; ++ ++ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); ++ unsigned removed = 0; ++ ++ // Up to 2 branches are removed. ++ // Note that indirect branches are not removed. ++ while (I != REnd && removed < 2) { ++ // Skip past debug instructions. ++ if (I->isDebugInstr()) { ++ ++I; ++ continue; ++ } ++ if (!getAnalyzableBrOpc(I->getOpcode())) ++ break; ++ // Remove the branch. ++ I->eraseFromParent(); ++ if (BytesRemoved) ++ *BytesRemoved += getInstSizeInBytes(*I); ++ I = MBB.rbegin(); ++ ++removed; ++ } ++ ++ return removed; ++} ++ ++/// reverseBranchCondition - Return the inverse opcode of the ++/// specified Branch instruction. ++bool LoongArchInstrInfo::reverseBranchCondition( ++ SmallVectorImpl &Cond) const { ++ assert( (Cond.size() && Cond.size() <= 3) && ++ "Invalid LoongArch branch condition!"); ++ Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); ++ return false; ++} ++ ++LoongArchInstrInfo::BranchType LoongArchInstrInfo::analyzeBranch( ++ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, bool AllowModify, ++ SmallVectorImpl &BranchInstrs) const { ++ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); ++ ++ // Skip all the debug instructions. ++ while (I != REnd && I->isDebugInstr()) ++ ++I; ++ ++ if (I == REnd || !isUnpredicatedTerminator(*I)) { ++ // This block ends with no branches (it just falls through to its succ). ++ // Leave TBB/FBB null. ++ TBB = FBB = nullptr; ++ return BT_NoBranch; ++ } ++ ++ MachineInstr *LastInst = &*I; ++ unsigned LastOpc = LastInst->getOpcode(); ++ BranchInstrs.push_back(LastInst); ++ ++ // Not an analyzable branch (e.g., indirect jump). ++ if (!getAnalyzableBrOpc(LastOpc)) ++ return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; ++ ++ // Get the second to last instruction in the block. ++ unsigned SecondLastOpc = 0; ++ MachineInstr *SecondLastInst = nullptr; ++ ++ // Skip past any debug instruction to see if the second last actual ++ // is a branch. ++ ++I; ++ while (I != REnd && I->isDebugInstr()) ++ ++I; ++ ++ if (I != REnd) { ++ SecondLastInst = &*I; ++ SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); ++ ++ // Not an analyzable branch (must be an indirect jump). ++ if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc) ++ return BT_None; ++ } ++ ++ // If there is only one terminator instruction, process it. ++ if (!SecondLastOpc) { ++ // Unconditional branch. ++ if (LastInst->isUnconditionalBranch()) { ++ TBB = LastInst->getOperand(0).getMBB(); ++ return BT_Uncond; ++ } ++ ++ // Conditional branch ++ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); ++ return BT_Cond; ++ } ++ ++ // If we reached here, there are two branches. ++ // If there are three terminators, we don't know what sort of block this is. ++ if (++I != REnd && isUnpredicatedTerminator(*I)) ++ return BT_None; ++ ++ BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); ++ ++ // If second to last instruction is an unconditional branch, ++ // analyze it and remove the last instruction. ++ if (SecondLastInst->isUnconditionalBranch()) { ++ // Return if the last instruction cannot be removed. ++ if (!AllowModify) ++ return BT_None; ++ ++ TBB = SecondLastInst->getOperand(0).getMBB(); ++ LastInst->eraseFromParent(); ++ BranchInstrs.pop_back(); ++ return BT_Uncond; ++ } ++ ++ // Conditional branch followed by an unconditional branch. ++ // The last one must be unconditional. ++ if (!LastInst->isUnconditionalBranch()) ++ return BT_None; ++ ++ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); ++ FBB = LastInst->getOperand(0).getMBB(); ++ ++ return BT_CondUncond; ++} ++ ++MachineBasicBlock * ++LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { ++ assert(MI.getDesc().isBranch() && "Unexpected opcode!"); ++ // The branch target is always the last operand. ++ int NumOp = MI.getNumExplicitOperands(); ++ return MI.getOperand(NumOp - 1).getMBB(); ++} ++ ++bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const { ++/* ++ switch (BranchOpc) { ++ case LoongArch::B: ++ case LoongArch::BAL: ++ case LoongArch::BAL_BR: ++ case LoongArch::BC1F: ++ case LoongArch::BC1FL: ++ case LoongArch::BC1T: ++ case LoongArch::BC1TL: ++ case LoongArch::BEQ: case LoongArch::BEQ64: ++ case LoongArch::BEQL: ++ case LoongArch::BGEZ: case LoongArch::BGEZ64: ++ case LoongArch::BGEZL: ++ case LoongArch::BGEZAL: ++ case LoongArch::BGEZALL: ++ case LoongArch::BGTZ: case LoongArch::BGTZ64: ++ case LoongArch::BGTZL: ++ case LoongArch::BLEZ: case LoongArch::BLEZ64: ++ case LoongArch::BLEZL: ++ case LoongArch::BLTZ: case LoongArch::BLTZ64: ++ case LoongArch::BLTZL: ++ case LoongArch::BLTZAL: ++ case LoongArch::BLTZALL: ++ case LoongArch::BNE: case LoongArch::BNE64: ++ case LoongArch::BNEL: ++ return isInt<18>(BrOffset); ++ ++ case LoongArch::BC1EQZ: ++ case LoongArch::BC1NEZ: ++ case LoongArch::BC2EQZ: ++ case LoongArch::BC2NEZ: ++ case LoongArch::BEQC: case LoongArch::BEQC64: ++ case LoongArch::BNEC: case LoongArch::BNEC64: ++ case LoongArch::BGEC: case LoongArch::BGEC64: ++ case LoongArch::BGEUC: case LoongArch::BGEUC64: ++ case LoongArch::BGEZC: case LoongArch::BGEZC64: ++ case LoongArch::BGTZC: case LoongArch::BGTZC64: ++ case LoongArch::BLEZC: case LoongArch::BLEZC64: ++ case LoongArch::BLTC: case LoongArch::BLTC64: ++ case LoongArch::BLTUC: case LoongArch::BLTUC64: ++ case LoongArch::BLTZC: case LoongArch::BLTZC64: ++ case LoongArch::BNVC: ++ case LoongArch::BOVC: ++ case LoongArch::BGEZALC: ++ case LoongArch::BEQZALC: ++ case LoongArch::BGTZALC: ++ case LoongArch::BLEZALC: ++ case LoongArch::BLTZALC: ++ case LoongArch::BNEZALC: ++ return isInt<18>(BrOffset); ++ ++ case LoongArch::BEQZC: case LoongArch::BEQZC64: ++ case LoongArch::BNEZC: case LoongArch::BNEZC64: ++ return isInt<23>(BrOffset); ++ } ++ */ ++ switch (BranchOpc) { ++ case LoongArch::B: case LoongArch::B32: ++ return isInt<28>(BrOffset); ++ ++ case LoongArch::BEQZ: case LoongArch::BEQZ32: ++ case LoongArch::BNEZ: case LoongArch::BNEZ32: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ return isInt<23>(BrOffset); ++ ++ case LoongArch::BEQ: case LoongArch::BEQ32: ++ case LoongArch::BNE: case LoongArch::BNE32: ++ case LoongArch::BLT: case LoongArch::BLT32: ++ case LoongArch::BGE: case LoongArch::BGE32: ++ case LoongArch::BLTU: case LoongArch::BLTU32: ++ case LoongArch::BGEU: case LoongArch::BGEU32: ++ return isInt<18>(BrOffset); ++ } ++ ++ llvm_unreachable("Unknown branch instruction!"); ++} ++ ++ ++/// Predicate for distingushing between control transfer instructions and all ++/// other instructions for handling forbidden slots. Consider inline assembly ++/// as unsafe as well. ++bool LoongArchInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const { ++ if (MI.isInlineAsm()) ++ return false; ++ ++ return (MI.getDesc().TSFlags & LoongArchII::IsCTI) == 0; ++} ++ ++/// Predicate for distingushing instructions that have forbidden slots. ++bool LoongArchInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { ++ return (MI.getDesc().TSFlags & LoongArchII::HasForbiddenSlot) != 0; ++} ++ ++/// Return the number of bytes of code the specified instruction may be. ++unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { ++ switch (MI.getOpcode()) { ++ default: ++ return MI.getDesc().getSize(); ++ case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. ++ const MachineFunction *MF = MI.getParent()->getParent(); ++ const char *AsmStr = MI.getOperand(0).getSymbolName(); ++ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); ++ } ++ } ++} ++ ++MachineInstrBuilder ++LoongArchInstrInfo::genInstrWithNewOpc(unsigned NewOpc, ++ MachineBasicBlock::iterator I) const { ++ MachineInstrBuilder MIB; ++ ++ int ZeroOperandPosition = -1; ++ bool BranchWithZeroOperand = false; ++ if (I->isBranch() && !I->isPseudo()) { ++ auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ++ ZeroOperandPosition = I->findRegisterUseOperandIdx(LoongArch::ZERO, false, TRI); ++ BranchWithZeroOperand = ZeroOperandPosition != -1; ++ } ++ ++ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); ++ ++ if (NewOpc == LoongArch::JIRL) { ++ MIB->removeOperand(0); ++ for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { ++ MIB.add(I->getOperand(J)); ++ } ++ MIB.addImm(0); ++ } else { ++ for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { ++ if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) ++ continue; ++ ++ MIB.add(I->getOperand(J)); ++ } ++ } ++ ++ MIB.copyImplicitOps(*I); ++ MIB.cloneMemRefs(*I); ++ return MIB; ++} ++ ++bool LoongArchInstrInfo::findCommutedOpIndices(const MachineInstr &MI, ++ unsigned &SrcOpIdx1, ++ unsigned &SrcOpIdx2) const { ++ assert(!MI.isBundle() && ++ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); ++ ++ const MCInstrDesc &MCID = MI.getDesc(); ++ if (!MCID.isCommutable()) ++ return false; ++ ++ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); ++} ++ ++// bstrins, bstrpick have the following constraints: ++// 0 <= lsb <= msb <= High ++static bool verifyBstrInstruction(const MachineInstr &MI, StringRef &ErrInfo, ++ const int64_t High) { ++ MachineOperand MOMsb = MI.getOperand(2); ++ if (!MOMsb.isImm()) { ++ ErrInfo = "Msb operand is not an immediate!"; ++ return false; ++ } ++ MachineOperand MOLsb = MI.getOperand(3); ++ if (!MOLsb.isImm()) { ++ ErrInfo = "Lsb operand is not an immediate!"; ++ return false; ++ } ++ ++ int64_t Lsb = MOLsb.getImm(); ++ if (!((0 <= Lsb) && (Lsb <= High))) { ++ ErrInfo = "Lsb operand is out of range!"; ++ return false; ++ } ++ ++ int64_t Msb = MOMsb.getImm(); ++ if (!((0 <= Msb) && (Msb <= High))) { ++ ErrInfo = "Msb operand is out of range!"; ++ return false; ++ } ++ ++ if (!(Lsb <= Msb)) { ++ ErrInfo = "Lsb operand is not less than or equal to msb operand!"; ++ return false; ++ } ++ ++ return true; ++} ++ ++// Perform target specific instruction verification. ++bool LoongArchInstrInfo::verifyInstruction(const MachineInstr &MI, ++ StringRef &ErrInfo) const { ++ // Verify that bstrins and bstrpick instructions are well formed. ++ switch (MI.getOpcode()) { ++ case LoongArch::BSTRINS_W: ++ case LoongArch::BSTRPICK_W: ++ return verifyBstrInstruction(MI, ErrInfo, 31); ++ case LoongArch::BSTRINS_D: ++ case LoongArch::BSTRPICK_D: ++ return verifyBstrInstruction(MI, ErrInfo, 63); ++ default: ++ return true; ++ } ++ ++ return true; ++} ++ ++std::pair ++LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { ++ return std::make_pair(TF, 0u); ++} ++ ++ArrayRef> ++LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { ++ using namespace LoongArchII; ++ ++ static const std::pair Flags[] = { ++ {MO_PCREL_HI, "larch-pcrel-hi"}, ++ {MO_PCREL_LO, "larch-pcrel-lo"}, ++ {MO_TLSGD_HI, "larch-tlsgd-hi"}, ++ {MO_TLSGD_LO, "larch-tlsgd-lo"}, ++ {MO_TLSIE_HI, "larch-tlsie-hi"}, ++ {MO_TLSIE_LO, "larch-tlsie-lo"}, ++ {MO_TLSLE_HI, "larch-tlsle-hi"}, ++ {MO_TLSLE_LO, "larch-tlsle-lo"}, ++ {MO_ABS_HI, "larch-abs-hi"}, ++ {MO_ABS_LO, "larch-abs-lo"}, ++ {MO_ABS_HIGHER, "larch-abs-higher"}, ++ {MO_ABS_HIGHEST, "larch-abs-highest"}, ++ {MO_GOT_HI, "larch-got-hi"}, ++ {MO_GOT_LO, "larch-got-lo"}, ++ {MO_CALL_HI, "larch-call-hi"}, ++ {MO_CALL_LO, "larch-call-lo"} ++ }; ++ return makeArrayRef(Flags); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +index 0a8c86a5e..53191a94d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +@@ -1,4 +1,4 @@ +-//=- LoongArchInstrInfo.h - LoongArch Instruction Information ---*- C++ -*-===// ++//===- LoongArchInstrInfo.h - LoongArch Instruction Information -----------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -8,39 +8,239 @@ + // + // This file contains the LoongArch implementation of the TargetInstrInfo class. + // ++// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in ++// order for LoongArchLongBranch pass to work correctly when the code has inline ++// assembly. The returned value doesn't have to be the asm instruction's exact ++// size in bytes; LoongArchLongBranch only expects it to be the correct upper bound. + //===----------------------------------------------------------------------===// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + ++#define DBAR_HINT 0x700 ++ ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArch.h" + #include "LoongArchRegisterInfo.h" ++#include "llvm/ADT/ArrayRef.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineMemOperand.h" + #include "llvm/CodeGen/TargetInstrInfo.h" ++#include + + #define GET_INSTRINFO_HEADER + #include "LoongArchGenInstrInfo.inc" + + namespace llvm { + ++class MachineInstr; ++class MachineOperand; + class LoongArchSubtarget; ++class TargetRegisterClass; ++class TargetRegisterInfo; + + class LoongArchInstrInfo : public LoongArchGenInstrInfo { ++ virtual void anchor(); ++ const LoongArchRegisterInfo RI; ++ const LoongArchSubtarget &Subtarget; ++ + public: +- explicit LoongArchInstrInfo(LoongArchSubtarget &STI); ++ enum BranchType { ++ BT_None, // Couldn't analyze branch. ++ BT_NoBranch, // No branches found. ++ BT_Uncond, // One unconditional branch. ++ BT_Cond, // One conditional branch. ++ BT_CondUncond, // A conditional branch followed by an unconditional branch. ++ BT_Indirect // One indirct branch. ++ }; ++ ++ explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + +- void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +- const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, ++ /// isLoadFromStackSlot - If the specified machine instruction is a direct ++ /// load from a stack slot, return the virtual or physical register number of ++ /// the destination along with the FrameIndex of the loaded stack slot. If ++ /// not, return 0. This predicate must return 0 if the instruction has ++ /// any side effects other than loading from the stack slot. ++ unsigned isLoadFromStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const override; ++ ++ /// isStoreToStackSlot - If the specified machine instruction is a direct ++ /// store to a stack slot, return the virtual or physical register number of ++ /// the source reg along with the FrameIndex of the loaded stack slot. If ++ /// not, return 0. This predicate must return 0 if the instruction has ++ /// any side effects other than storing to the stack slot. ++ unsigned isStoreToStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const override; ++ ++ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + ++ /// Branch Analysis ++ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify) const override; ++ ++ unsigned removeBranch(MachineBasicBlock &MBB, ++ int *BytesRemoved = nullptr) const override; ++ ++ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ MachineBasicBlock *FBB, ArrayRef Cond, ++ const DebugLoc &DL, ++ int *BytesAdded = nullptr) const override; ++ ++ void insertIndirectBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock &NewDestBB, ++ MachineBasicBlock &RestoreBB, const DebugLoc &DL, ++ int64_t BrOffset, ++ RegScavenger *RS = nullptr) const override; ++ bool ++ reverseBranchCondition(SmallVectorImpl &Cond) const override; ++ ++ BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify, ++ SmallVectorImpl &BranchInstrs) const; ++ ++ /// Get the block that branch instruction jumps to. ++ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; ++ ++ /// Determine if the branch target is in range. ++ bool isBranchOffsetInRange(unsigned BranchOpc, ++ int64_t BrOffset) const override; ++ ++ /// Predicate to determine if an instruction can go in a forbidden slot. ++ bool SafeInForbiddenSlot(const MachineInstr &MI) const; ++ ++ /// Predicate to determine if an instruction has a forbidden slot. ++ bool HasForbiddenSlot(const MachineInstr &MI) const; ++ ++ /// Insert nop instruction when hazard condition is found ++ void insertNoop(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI) const override; ++ ++ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As ++ /// such, whenever a client has an instance of instruction info, it should ++ /// always be able to get register info as well (through this method). ++ const LoongArchRegisterInfo &getRegisterInfo() const; ++ ++ bool expandPostRAPseudo(MachineInstr &MI) const override; ++ ++ unsigned getOppositeBranchOpc(unsigned Opc) const; ++ ++ /// Emit a series of instructions to load an immediate. ++ unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator II, ++ const DebugLoc &DL) const; ++ ++ /// Return the number of bytes of code the specified instruction may be. ++ unsigned getInstSizeInBytes(const MachineInstr &MI) const override; ++ + void storeRegToStackSlot(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, Register SrcReg, +- bool IsKill, int FrameIndex, ++ MachineBasicBlock::iterator MBBI, ++ Register SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, +- const TargetRegisterInfo *TRI) const override; ++ const TargetRegisterInfo *TRI) const override { ++ storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); ++ } ++ + void loadRegFromStackSlot(MachineBasicBlock &MBB, +- MachineBasicBlock::iterator MBBI, Register DstReg, +- int FrameIndex, const TargetRegisterClass *RC, +- const TargetRegisterInfo *TRI) const override; ++ MachineBasicBlock::iterator MBBI, ++ Register DestReg, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI) const override { ++ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); ++ } ++ ++ void storeRegToStack(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ Register SrcReg, bool isKill, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, ++ int64_t Offset) const; ++ ++ void loadRegFromStack(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ Register DestReg, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, ++ int64_t Offset) const; ++ ++ /// Adjust register value(DestReg = SrcReg + Amount). ++ void ++ adjustReg(unsigned DestReg, unsigned SrcReg, int64_t Amount, ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ MachineInstr::MIFlag Flag = MachineInstr::MIFlag::NoFlags) const; ++ ++ /// Create an instruction which has the same operands and memory operands ++ /// as MI but has a new opcode. ++ MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, ++ MachineBasicBlock::iterator I) const; ++ ++ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, ++ unsigned &SrcOpIdx2) const override; ++ ++ /// Perform target specific instruction verification. ++ bool verifyInstruction(const MachineInstr &MI, ++ StringRef &ErrInfo) const override; ++ ++ std::pair ++ decomposeMachineOperandsTargetFlags(unsigned TF) const override; ++ ++ ArrayRef> ++ getSerializableDirectMachineOperandTargetFlags() const override; ++ ++protected: ++ /// If the specific machine instruction is a instruction that moves/copies ++ /// value from one register to another register return true along with ++ /// @Source machine operand and @Destination machine operand. ++ Optional ++ isCopyInstrImpl(const MachineInstr &MI) const override; ++ ++private: ++ ++ bool isZeroImm(const MachineOperand &op) const; ++ ++ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, ++ MachineMemOperand::Flags Flags) const; ++ ++ unsigned getAnalyzableBrOpc(unsigned Opc) const; ++ ++ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, ++ MachineBasicBlock *&BB, ++ SmallVectorImpl &Cond) const; ++ ++ MachineInstr * ++ BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ const DebugLoc &DL, ArrayRef Cond) const; ++ ++ void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; ++ ++ void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; ++ ++ std::pair compareOpndSize(unsigned Opc, ++ const MachineFunction &MF) const; ++ ++ /// Expand pseudo Int-to-FP conversion instructions. ++ /// ++ /// For example, the following pseudo instruction ++ /// PseudoCVT_D32_W D2, A5 ++ /// gets expanded into these two instructions: ++ /// MTC1 F4, A5 ++ /// CVT_D32_W D2, F4 ++ /// ++ /// We do this expansion post-RA to avoid inserting a floating point copy ++ /// instruction between MTC1 and CVT_D32_W. ++ void expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ unsigned CvtOpc, unsigned MovOpc, bool IsI64) const; ++ ++ void expandEhReturn(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const; + }; + + } // end namespace llvm ++ + #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index d07d086bd..2d505ee25 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1,4 +1,4 @@ +-//== LoongArchInstrInfo.td - Target Description for LoongArch -*- tablegen -*-// ++//===- LoongArchInstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,929 +6,1883 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file describes the LoongArch instructions in TableGen format. ++// This file contains the LoongArch implementation of the TargetInstrInfo class. + // + //===----------------------------------------------------------------------===// ++include "LoongArchInstrFormats.td" + +-//===----------------------------------------------------------------------===// +-// LoongArch specific DAG Nodes. +-//===----------------------------------------------------------------------===// ++def SDT_Bstrpick : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; ++def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, ++ SDTCisSameAs<0, 4>]>; + +-// Target-independent type requirements, but with target-specific formats. +-def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, +- SDTCisVT<1, i32>]>; +-def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, +- SDTCisVT<1, i32>]>; +- +-// Target-dependent type requirements. +-def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>; +-def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ +- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> +-]>; +- +-def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [ +- SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, +- SDTCisSameAs<3, 4> +-]>; +- +-def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ +- SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> +-]>; +- +-// TODO: Add LoongArch specific DAG Nodes +-// Target-independent nodes, but with target-specific formats. +-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, +- [SDNPHasChain, SDNPOutGlue]>; +-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, +- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +- +-// Target-dependent nodes. +-def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, +- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, +- SDNPVariadic]>; +-def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, +- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +-def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; +-def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; +-def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +-def loongarch_bstrins +- : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; +-def loongarch_bstrpick +- : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; ++def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; + +-//===----------------------------------------------------------------------===// +-// Operand and SDNode transformation definitions. +-//===----------------------------------------------------------------------===// ++def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; + +-class ImmAsmOperand +- : AsmOperandClass { +- let Name = prefix # "Imm" # width # suffix; +- let DiagnosticType = !strconcat("Invalid", Name); ++def SDT_DBAR : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; ++def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>; ++ ++def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; ++ ++def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++ ++//===---------------------------------------------------------------------===/ ++// Operand, Complex Patterns and Transformations Definitions. ++//===---------------------------------------------------------------------===/ ++ ++def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ ++ return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); ++}]>; ++ ++def immz : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; ++def immZExt12 : PatLeaf<(imm), [{ return isUInt<12>(N->getZExtValue()); }]>; ++def immSExt12 : PatLeaf<(imm), [{ return isInt<12>(N->getSExtValue()); }]>; ++def immSExt13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; ++ ++def immZExt2Alsl : ImmLeaf(Imm - 1);}]>; ++//class ImmAsmOperand : AsmOperandClass { ++// let RenderMethod = "addImmOperands"; ++// let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; ++// let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; ++//} ++// ++//def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } ++//def imm8 : Operand, ImmLeaf { ++// let ParserMatchClass = Imm8AsmOperand; ++//} ++ ++def HasLSX : Predicate<"Subtarget->hasLSX()">, ++ AssemblerPredicate<(all_of FeatureLSX)>; ++def HasLASX : Predicate<"Subtarget->hasLASX()">, ++ AssemblerPredicate<(all_of FeatureLASX)>; ++ ++class EXT_LSX { ++ list ExtPredicate = [HasLSX]; ++} ++ ++class EXT_LASX { ++ list ExtPredicate = [HasLASX]; ++} ++ ++class SImmOperand : AsmOperandClass { ++ let Name = "SImm" # width; ++ let DiagnosticType = "InvalidSImm" # width; + let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isSImm<" # width # ">"; + } + +-class SImmAsmOperand +- : ImmAsmOperand<"S", width, suffix> { ++def SImm2Operand : SImmOperand<2>; ++def simm2 : Operand, ImmLeaf= -2 && Imm < 2; }]> { ++ let ParserMatchClass = SImm2Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++} ++def SImm3Operand : SImmOperand<3>; ++def simm3 : Operand, ImmLeaf= -4 && Imm < 4; }]> { ++ let ParserMatchClass = SImm3Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<3>"; + } + +-class UImmAsmOperand +- : ImmAsmOperand<"U", width, suffix> { ++def SImm5Operand : SImmOperand<5>; ++def simm5 : Operand, ImmLeaf= -16 && Imm < 16; }]> { ++ let ParserMatchClass = SImm5Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; + } + +-def uimm2 : Operand { +- let ParserMatchClass = UImmAsmOperand<2>; ++def simm5_32 : Operand, ImmLeaf= -16 && Imm < 16; }]> { ++ let ParserMatchClass = SImm5Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; + } + +-def uimm2_plus1 : Operand { +- let ParserMatchClass = UImmAsmOperand<2, "plus1">; +- let EncoderMethod = "getImmOpValueSub1"; +- let DecoderMethod = "decodeUImmOperand<2, 1>"; ++def SImm8Operand : SImmOperand<8>; ++def simm8 : Operand, ImmLeaf= -128 && Imm < 128; }]> { ++ let ParserMatchClass = SImm8Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++} ++def simm8_32 : Operand, ImmLeaf= -128 && Imm < 128; }]> { ++ let ParserMatchClass = SImm8Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; + } + +-def uimm3 : Operand { +- let ParserMatchClass = UImmAsmOperand<3>; ++def SImm12Operand : SImmOperand<12>; ++def simm12 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { ++ let ParserMatchClass = SImm12Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; ++} ++def simm12_32 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { ++ let ParserMatchClass = SImm12Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; + } + +-def uimm5 : Operand, ImmLeaf(Imm);}]> { +- let ParserMatchClass = UImmAsmOperand<5>; ++def SImm14Operand : SImmOperand<14>; ++def simm14 : Operand, ImmLeaf= -8192 && Imm < 8192; }]> { ++ let ParserMatchClass = SImm14Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<14>"; + } + +-def uimm6 : Operand, ImmLeaf(Imm);}]> { +- let ParserMatchClass = UImmAsmOperand<6>; ++def SImm15Operand : SImmOperand<15>; ++def simm15 : Operand, ImmLeaf= -16384 && Imm < 16384; }]> { ++ let ParserMatchClass = SImm15Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<15>"; + } + +-def uimm8 : Operand { +- let ParserMatchClass = UImmAsmOperand<8>; ++def SImm16Operand : SImmOperand<16>; ++def simm16 : Operand, ImmLeaf= -32768 && Imm < 32768; }]> { ++ let ParserMatchClass = SImm16Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>"; + } + +-def uimm12 : Operand, ImmLeaf(Imm);}]> { +- let ParserMatchClass = UImmAsmOperand<12>; ++def SImm20Operand : SImmOperand<20>; ++def simm20 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { ++ let ParserMatchClass = SImm20Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; ++} ++def simm20_32 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { ++ let ParserMatchClass = SImm20Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; + } + +-def uimm14 : Operand { +- let ParserMatchClass = UImmAsmOperand<14>; ++def SImm21Operand : SImmOperand<21>; ++def simm21 : Operand, ImmLeaf= -1048576 && Imm < 1048576; }]> { ++ let ParserMatchClass = SImm21Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<21>"; + } + +-def uimm15 : Operand { +- let ParserMatchClass = UImmAsmOperand<15>; ++def SImm26Operand : SImmOperand<26>; ++def simm26 : Operand, ImmLeaf= -33554432 && Imm < 33554432; }]> { ++ let ParserMatchClass = SImm26Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<26>"; + } + +-def simm12 : Operand, ImmLeaf(Imm);}]> { +- let ParserMatchClass = SImmAsmOperand<12>; +- let DecoderMethod = "decodeSImmOperand<12>"; ++def UImm1Operand : AsmOperandClass { ++ let Name = "UImm1"; ++ let RenderMethod = "addUImmOperands<1>"; ++ let PredicateMethod = "isUImm<1>"; ++ let DiagnosticType = "InvalidImm0_1"; + } + +-def simm14_lsl2 : Operand { +- let ParserMatchClass = SImmAsmOperand<14, "lsl2">; +- let EncoderMethod = "getImmOpValueAsr2"; +- let DecoderMethod = "decodeSImmOperand<14, 2>"; ++def UImm2Operand : AsmOperandClass { ++ let Name = "UImm2"; ++ let RenderMethod = "addUImmOperands<2>"; ++ let PredicateMethod = "isUImm<2>"; ++ let DiagnosticType = "InvalidImm0_3"; + } + +-def simm16 : Operand { +- let ParserMatchClass = SImmAsmOperand<16>; +- let DecoderMethod = "decodeSImmOperand<16>"; ++def UImm3Operand : AsmOperandClass { ++ let Name = "UImm3"; ++ let RenderMethod = "addUImmOperands<3>"; ++ let PredicateMethod = "isUImm<3>"; ++ let DiagnosticType = "InvalidImm0_7"; + } + +-def simm16_lsl2 : Operand, +- ImmLeaf(Imm>>2);}]> { +- let ParserMatchClass = SImmAsmOperand<16, "lsl2">; +- let EncoderMethod = "getImmOpValueAsr2"; +- let DecoderMethod = "decodeSImmOperand<16, 2>"; ++def UImm4Operand : AsmOperandClass { ++ let Name = "UImm4"; ++ let RenderMethod = "addUImmOperands<4>"; ++ let PredicateMethod = "isUImm<4>"; ++ let DiagnosticType = "InvalidImm0_15"; + } + +-def simm16_lsl2_br : Operand { +- let ParserMatchClass = SImmAsmOperand<16, "lsl2">; +- let EncoderMethod = "getImmOpValueAsr2"; +- let DecoderMethod = "decodeSImmOperand<16, 2>"; ++def UImm5Operand : AsmOperandClass { ++ let Name = "UImm5"; ++ let RenderMethod = "addUImmOperands<5>"; ++ let PredicateMethod = "isUImm<5>"; ++ let DiagnosticType = "InvalidImm0_31"; + } + +-def simm20 : Operand { +- let ParserMatchClass = SImmAsmOperand<20>; +- let DecoderMethod = "decodeSImmOperand<20>"; ++def uimm1i : Operand, ImmLeaf= 0 && Imm < 2; }]> { ++ let PrintMethod = "printUImm<1>"; ++ let ParserMatchClass = UImm1Operand; + } + +-def simm21_lsl2 : Operand { +- let ParserMatchClass = SImmAsmOperand<21, "lsl2">; +- let EncoderMethod = "getImmOpValueAsr2"; +- let DecoderMethod = "decodeSImmOperand<21, 2>"; ++def uimm2 : Operand, ImmLeaf= 0 && Imm < 4; }]> { ++ let PrintMethod = "printUImm<2>"; ++ let ParserMatchClass = UImm2Operand; + } + +-def simm26_lsl2 : Operand { +- let ParserMatchClass = SImmAsmOperand<26, "lsl2">; +- let EncoderMethod = "getImmOpValueAsr2"; +- let DecoderMethod = "decodeSImmOperand<26, 2>"; ++def uimm3 : Operand, ImmLeaf= 0 && Imm < 8; }]> { ++ let PrintMethod = "printUImm<3>"; ++ let ParserMatchClass = UImm3Operand; + } + +-// Standalone (codegen-only) immleaf patterns. ++def uimm4i : Operand, ImmLeaf= 0 && Imm < 16; }]> { ++ let PrintMethod = "printUImm<4>"; ++ let ParserMatchClass = UImm4Operand; ++} + +-// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048]. +-def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; ++def uimm5 : Operand, ImmLeaf= 0 && Imm < 32; }]> { ++ let PrintMethod = "printUImm<5>"; ++ let ParserMatchClass = UImm5Operand; ++} + +-// Return the negation of an immediate value. +-def NegImm : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), +- N->getValueType(0)); +-}]>; ++def UImm6Operand : AsmOperandClass { ++ let Name = "UImm6"; ++ let RenderMethod = "addUImmOperands<16>"; ++ let PredicateMethod = "isUImm<6>"; ++ let DiagnosticType = "InvalidImm0_63"; ++} ++def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { ++ let PrintMethod = "printUImm<6>"; ++ let ParserMatchClass = UImm6Operand; ++} + +-// FP immediate patterns. +-def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>; +-def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>; +-def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; ++def UImm7Operand : AsmOperandClass { ++ let Name = "UImm7"; ++ let RenderMethod = "addUImmOperands<16>"; ++ let PredicateMethod = "isUImm<7>"; ++ let DiagnosticType = "InvalidImm0_127"; ++} + +-def CallSymbol: AsmOperandClass { +- let Name = "CallSymbol"; ++def uimm7i : Operand, ImmLeaf= 0 && Imm < 128; }]> { ++ let PrintMethod = "printUImm<7>"; ++ let ParserMatchClass = UImm7Operand; ++} ++ ++def UImm12Operand : AsmOperandClass { ++ let Name = "UImm12"; ++ let RenderMethod = "addUImmOperands<12>"; ++ let PredicateMethod = "isUImm<12>"; ++ let DiagnosticType = "InvalidImm0_4095"; ++} ++def uimm12 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { ++ let PrintMethod = "printUImm<12>"; ++ let ParserMatchClass = UImm12Operand; ++} ++def uimm12_32 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { ++ let PrintMethod = "printUImm<12>"; ++ let ParserMatchClass = UImm12Operand; ++} ++ ++def UImm15Operand : AsmOperandClass { ++ let Name = "UImm15"; ++ let RenderMethod = "addUImmOperands<15>"; ++ let PredicateMethod = "isUImm<15>"; ++ let DiagnosticType = "InvalidImm0_32767"; ++} ++def uimm15 : Operand, ImmLeaf= 0 && Imm < 32768; }]> { ++ let PrintMethod = "printUImm<15>"; ++ let ParserMatchClass = UImm15Operand; ++} ++ ++def UImm14Operand : AsmOperandClass { ++ let Name = "UImm14"; ++ let RenderMethod = "addUImmOperands<14>"; ++ let PredicateMethod = "isUImm<14>"; ++ let DiagnosticType = "InvalidImm0_16383"; ++} ++def uimm14 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { ++ let PrintMethod = "printUImm<14>"; ++ let ParserMatchClass = UImm14Operand; ++} ++def uimm14_32 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { ++ let PrintMethod = "printUImm<14>"; ++ let ParserMatchClass = UImm14Operand; ++} ++ ++def UImm8Operand : AsmOperandClass { ++ let Name = "UImm8"; ++ let RenderMethod = "addUImmOperands<8>"; ++ let PredicateMethod = "isUImm<8>"; ++ let DiagnosticType = "InvalidImm0_255"; ++} ++def uimm8_64 : Operand, ImmLeaf= 0 && Imm < 256; }]> { ++ let PrintMethod = "printUImm<8>"; ++ let ParserMatchClass = UImm8Operand; ++} ++ ++def uimm8_32 : Operand, ImmLeaf= 0 && Imm < 256; }]> { ++ let PrintMethod = "printUImm<8>"; ++ let ParserMatchClass = UImm8Operand; ++} ++ ++def addr : ++ComplexPattern; ++ ++def addrDefault : ++ComplexPattern; ++ ++def addrRegImm : ++ComplexPattern; ++ ++def addrimm14lsl2 : ComplexPattern; ++ ++class ConstantUImmAsmOperandClass Supers = [], ++ int Offset = 0> : AsmOperandClass { ++ let Name = "ConstantUImm" # Bits # "_" # Offset; ++ let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; ++ let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImm" # Bits # "_" # Offset; ++} ++class SImmAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "SImm" # Bits; ++ let RenderMethod = "addSImmOperands<" # Bits # ">"; ++ let PredicateMethod = "isSImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits; ++} ++class UImmAnyAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "ImmAny"; ++ let RenderMethod = "addConstantUImmOperands<32>"; ++ let PredicateMethod = "isSImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "ImmAny"; ++} ++ ++def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> { ++ let Name = "UImm32_Coerced"; ++ let DiagnosticType = "UImm32_Coerced"; ++} ++def SImm32RelaxedAsmOperandClass ++ : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> { ++ let Name = "SImm32_Relaxed"; ++ let PredicateMethod = "isAnyImm<33>"; ++ let DiagnosticType = "SImm32_Relaxed"; ++} ++def SImm32AsmOperandClass ++ : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>; ++def ConstantUImm26AsmOperandClass ++ : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>; ++ ++def ConstantUImm20AsmOperandClass ++ : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>; ++ ++def ConstantUImm2Plus1AsmOperandClass ++ : ConstantUImmAsmOperandClass<2, [ConstantUImm20AsmOperandClass], 1>; ++ ++class UImmAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "UImm" # Bits; ++ let RenderMethod = "addUImmOperands<" # Bits # ">"; ++ let PredicateMethod = "isUImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImm" # Bits; ++} ++ ++def UImm16RelaxedAsmOperandClass ++ : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> { ++ let Name = "UImm16_Relaxed"; ++ let PredicateMethod = "isAnyImm<16>"; ++ let DiagnosticType = "UImm16_Relaxed"; ++} ++ ++def ConstantSImm14Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm14Lsl2"; + let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<14, 2>"; ++ let SuperClasses = [UImm16RelaxedAsmOperandClass]; ++ let DiagnosticType = "SImm14_Lsl2"; ++} ++ ++foreach I = {2} in ++ def simm14_lsl # I : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<14, " # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantSImm14Lsl" # I # "AsmOperandClass"); ++ } ++ ++def uimm16_64_relaxed : Operand { ++ let PrintMethod = "printUImm<16>"; ++ let ParserMatchClass = ++ !cast("UImm16RelaxedAsmOperandClass"); ++} ++ ++def uimm2_plus1 : Operand { ++ let PrintMethod = "printUImm<2, 1>"; ++ let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>"; ++ let DecoderMethod = "DecodeUImmWithOffset<2, 1>"; ++ let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; ++} ++ ++// like simm32 but coerces simm32 to uimm32. ++def uimm32_coerced : Operand { ++ let ParserMatchClass = !cast("UImm32CoercedAsmOperandClass"); ++} ++ ++def imm64: Operand; ++ ++def LoongArchMemAsmOperand : AsmOperandClass { ++ let Name = "Mem"; ++ let ParserMethod = "parseMemOperand"; ++} ++ ++def LoongArchAMemAsmOperand : AsmOperandClass { ++ let Name = "AMem"; ++ let ParserMethod = "parseAMemOperand"; ++ let RenderMethod = "addMemOperands"; ++ let PredicateMethod = "isZeroMemOff"; ++ let DiagnosticType = "MemZeroOff"; ++} ++ ++def LoongArchMemSimm14AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm14"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<14>"; ++ let DiagnosticType = "MemSImm14"; ++} ++ ++foreach I = {2} in ++ def LoongArchMemSimm14Lsl # I # AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm14_" # I; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<14, " # I # ">"; ++ let DiagnosticType = "MemSImm14Lsl" # I; ++ } ++ ++def LoongArchMemSimmPtrAsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimmPtr"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithPtrSizeOffset"; ++ let DiagnosticType = "MemSImmPtr"; ++} ++ ++class mem_generic : Operand { ++ let PrintMethod = "printMemOperand"; ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let ParserMatchClass = LoongArchMemAsmOperand; ++ let OperandType = "OPERAND_MEMORY"; ++} ++ ++// Address operand ++def mem : mem_generic; ++ ++def amem : mem_generic { ++ let PrintMethod = "printAMemOperand"; ++ let EncoderMethod = "getAMemEncoding"; ++ let ParserMatchClass = LoongArchAMemAsmOperand; ++} ++ ++def mem_simmptr : mem_generic { ++ let ParserMatchClass = LoongArchMemSimmPtrAsmOperand; ++} ++ ++foreach I = {2} in ++ def mem_simm14_lsl # I : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm14_lsl" # I)); ++ let EncoderMethod = "getSimm14MemEncoding<" # I # ">"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm14Lsl" # I # "AsmOperand"); ++ } ++ ++def mem_ea : Operand { ++ let PrintMethod = "printMemOperandEA"; ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let OperandType = "OPERAND_MEMORY"; ++} ++ ++def LoongArchJumpTargetAsmOperand : AsmOperandClass { ++ let Name = "JumpTarget"; ++ let ParserMethod = "parseJumpTarget"; + let PredicateMethod = "isImm"; ++ let RenderMethod = "addImmOperands"; + } + +-// A bare symbol used in call only. +-def call_symbol : Operand { +- let ParserMatchClass = CallSymbol; ++def jmptarget : Operand { ++ let EncoderMethod = "getJumpTargetOpValue"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; + } + +-def BaseAddr : ComplexPattern; ++def brtarget : Operand { ++ let EncoderMethod = "getBranchTargetOpValue"; ++ let OperandType = "OPERAND_PCREL"; ++ let DecoderMethod = "DecodeBranchTarget"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; ++} + +-//===----------------------------------------------------------------------===// +-// Instruction Formats +-//===----------------------------------------------------------------------===// ++def calltarget : Operand { ++ let EncoderMethod = "getJumpTargetOpValue"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; ++} + +-include "LoongArchInstrFormats.td" +-include "LoongArchFloatInstrFormats.td" ++// ++//SDNode ++// ++def IsGP64bit : Predicate<"Subtarget->is64Bit()">, ++ AssemblerPredicate<(all_of Feature64Bit)>; ++def IsGP32bit : Predicate<"!Subtarget->is64Bit()">, ++ AssemblerPredicate<(all_of (not Feature64Bit))>; ++def SDT_LoongArchCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; ++def SDT_LoongArchCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; ++ ++def LoongArchRet : SDNode<"LoongArchISD::Ret", SDTNone, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++def LoongArchERet : SDNode<"LoongArchISD::ERet", SDTNone, ++ [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>; ++ ++def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_LoongArchCallSeqStart, ++ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; ++def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_LoongArchCallSeqEnd, ++ [SDNPHasChain, SDNPSideEffect, ++ SDNPOptInGlue, SDNPOutGlue]>; ++def LoongArchAddress : SDNode<"LoongArchISD::GlobalAddress", SDTIntUnaryOp>; ++ ++// Return RA. ++let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in { ++ def RetRA : LoongArchPseudo<(outs), (ins), [(LoongArchRet)]>; ++ ++ let hasSideEffects=1 in ++ def ERet : LoongArchPseudo<(outs), (ins), [(LoongArchERet)]>; ++} + +-//===----------------------------------------------------------------------===// ++let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { ++def ADJCALLSTACKDOWN : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), ++ [(callseq_start timm:$amt1, timm:$amt2)]>; ++def ADJCALLSTACKUP : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), ++ [(callseq_end timm:$amt1, timm:$amt2)]>; ++} ++ ++class LoongArchPat : Pat, PredicateControl; ++ ++def SDT_LoongArchJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; ++ ++def LoongArchJmpLink : SDNode<"LoongArchISD::JmpLink",SDT_LoongArchJmpLink, ++ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, ++ SDNPVariadic]>; ++ ++def LoongArchTailCall : SDNode<"LoongArchISD::TailCall", SDT_LoongArchJmpLink, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++ ++class GPR_32 { list GPRPredicates = [IsGP32bit]; } ++class GPR_64 { list GPRPredicates = [IsGP64bit]; } ++ ++//===---------------------------------------------------------------------===/ + // Instruction Class Templates +-//===----------------------------------------------------------------------===// ++//===---------------------------------------------------------------------===/ ++///R2 ++class Int_Reg2 ++ : InstForm<(outs RO:$rd), (ins RO:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (OpNode RO:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Iocsrrd ++ : InstForm<(outs RD:$rd), (ins RS:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RD:$rd, (OpNode RS:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Rdtime ++ : InstForm<(outs RO:$rd, RO:$rj), (ins), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set (OpNode RO:$rd, RO:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Iocsrwr ++ : InstForm<(outs), (ins RD:$rd, RS:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set (OpNode RD:$rd, RS:$rj))], ++ FrmR, opstr>; ++ ++class Float_Reg2 ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode RO:$fj))], ++ FrmFR, opstr>; ++ ++class Count1 ++ : InstForm<(outs RO:$rd), (ins RO:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (OpNode (not RO:$rj)))], ++ FrmR, opstr>; ++ ++class SignExtInReg ++ : InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (sext_inreg RO:$rj, vt))], FrmR, opstr>; ++ ++///R3 ++class Int_Reg3 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class Int_Reg3_Crc ++ : InstForm<(outs RS:$rd), (ins RD:$rj, RS:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RS:$rd, (OpNode RD:$rj, RS:$rk))], ++ FrmR, opstr>; ++ ++class SetCC_R ++ : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set GPR32Opnd:$rd, (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class SetCC_I ++ : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, ImmOpnd:$imm12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set GPR32Opnd:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], ++ FrmR, opstr>; ++ ++class ATOMIC ++ : InstForm<(outs RD:$rd), (ins RD:$rk, MO:$addr), ++ !strconcat(opstr, "\t$rd, $rk, $addr"), ++ [(set RD:$rd, (OpNode RD:$rk, Addr:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeAMem"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++ let mayStore = 1; ++ let Constraints = "@earlyclobber $rd"; ++} + +-class ALU_3R op, string opstr> +- : Fmt3R; +-class ALU_2R op, string opstr> +- : Fmt2R; +- +-class ALU_3RI2 op, string opstr, Operand ImmOpnd> +- : Fmt3RI2; +-class ALU_3RI3 op, string opstr, Operand ImmOpnd> +- : Fmt3RI3; +-class ALU_2RI5 op, string opstr, Operand ImmOpnd> +- : Fmt2RI5; +-class ALU_2RI6 op, string opstr, Operand ImmOpnd> +- : Fmt2RI6; +-class ALU_2RI12 op, string opstr, Operand ImmOpnd> +- : Fmt2RI12; +-class ALU_2RI16 op, string opstr, Operand ImmOpnd> +- : Fmt2RI16; +-class ALU_1RI20 op, string opstr, Operand ImmOpnd> +- : Fmt1RI20; +- +-class MISC_I15 op, string opstr> +- : FmtI15; +- +-class RDTIME_2R op, string opstr> +- : Fmt2R; +- +-class BrCC_2RI16 op, string opstr> +- : Fmt2RI16 { +- let isBranch = 1; +- let isTerminator = 1; ++class Nor ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (not (or RO:$rj, RO:$rk)))], ++ FrmR, opstr>; ++ ++class Shift_Var ++ : InstForm<(outs RO:$rd), (ins RO:$rj, GPR32Opnd:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (OpNode RO:$rj, GPR32Opnd:$rk))], ++ FrmR, opstr>; ++ ++class Float_Reg3 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [(set RO:$fd, (OpNode RO:$fj, RO:$fk))], ++ FrmR, opstr>; ++ ++class Float_Reg3_MA ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [(set RO:$fd, (OpNode (fabs RO:$fj), (fabs RO:$fk)))], ++ FrmR, opstr>; ++ ++class Float_Int_Reg3 ++ : InstForm<(outs RD:$fd), (ins RS:$rj, RS:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(set RS:$fd, (OpNode RS:$rj, RS:$rk))], ++ FrmR, opstr>; ++ ++///R4 ++class Mul_Reg4 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), ++ [], ++ FrmFR, opstr>; ++ ++class NMul_Reg4 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), ++ [], ++ FrmFR, opstr>; ++ ++///R2_IMM5 ++class Shift_Imm32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm5:$imm5), ++ !strconcat(opstr, "\t$rd, $rj, $imm5"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm5:$imm5))], ++ FrmR, opstr>; ++ ++///R2_IMM6 ++class Shift_Imm64 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm6:$imm6), ++ !strconcat(opstr, "\t$rd, $rj, $imm6"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm6:$imm6))], ++ FrmR, opstr>; ++ ++///LOAD_STORE ++class FLd ++ : InstForm<(outs RD:$rd), (ins MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RD:$rd, (OpNode addrDefault:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeFMem"; ++ let mayLoad = 1; ++} ++ ++class Ld ++ : InstForm<(outs RD:$rd), (ins MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RD:$rd, (OpNode Addr:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeMem"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class FSt ++ : InstForm<(outs), (ins RD:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(OpNode RD:$rd, addrDefault:$addr)], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeFMem"; ++ let mayStore = 1; ++} ++ ++class St ++ : InstForm<(outs), (ins RS:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(OpNode RS:$rd, addr:$addr)], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeMem"; ++ string BaseOpcode = opstr; ++ let mayStore = 1; ++} ++ ++/// R2_IMM12 ++class Int_Reg2_Imm12 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], ++ FrmR, opstr>; ++class RELOC_rrii ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12))], ++ FrmR, opstr>; ++ ++///R2_IMM14 ++class LdPtr ++ : InstForm<(outs RO:$rd), (ins mem_simm14_lsl2:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr>{ ++ let DecoderMethod = "DecodeMemSimm14"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class StPtr ++ : InstForm<(outs), (ins RO:$rd, mem_simm14_lsl2:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ string BaseOpcode = opstr; ++ let mayStore = 1; + } +-class BrCCZ_1RI21 op, string opstr> +- : Fmt1RI21 { +- let isBranch = 1; ++ ++///R2_IMM16 ++class FJirl ++ : InstForm<(outs RO:$rd), (ins RO:$rj, opnd:$offs16), ++ !strconcat(opstr, "\t$rd, $rj, $offs16"), ++ [], FrmJ, opstr>; ++ ++class Beq ++ : InstForm<(outs), (ins RO:$rj, RO:$rd, opnd:$offs16), ++ !strconcat(opstr, "\t$rj, $rd, $offs16"), ++ [(brcond (i32 (cond_op RO:$rj, RO:$rd)), bb:$offs16)], ++ FrmI, opstr> { ++ let isBranch = 1; ++ let isTerminator = 1; ++ bit isCTI = 1; ++} ++ ++///R1_IMM21 ++class Beqz ++ : InstForm<(outs), (ins RO:$rj, opnd:$offs21), ++ !strconcat(opstr, "\t$rj, $offs21"), ++ [(brcond (i32 (cond_op RO:$rj, 0)), bb:$offs21)], ++ FrmI, opstr> { ++ let isBranch = 1; ++ let isTerminator = 1; ++ bit isCTI = 1; ++} ++ ++///IMM26 ++class JumpFB : ++ InstForm<(outs), (ins opnd:$offset26), !strconcat(opstr, "\t$offset26"), ++ [(operator targetoperator:$offset26)], FrmJ, opstr> { ++ let isBranch = 1; ++ let isTerminator=1; ++ let isBarrier=1; ++ let DecoderMethod = "DecodeJumpTarget"; ++ bit isCTI = 1; ++} ++ ++/// R3_SA ++class Reg3_Sa ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk, ImmOpnd:$sa), ++ !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), ++ [(set RO:$rd, (OpNode RO:$rj, RO:$rk, ImmOpnd:$sa))], ++ FrmR, opstr>; ++ ++class Reg3_SaU ++ : InstForm<(outs RD:$rd), (ins RS:$rj, RS:$rk, ImmOpnd:$sa), ++ !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), ++ [(set RD:$rd, (OpNode RS:$rj, RS:$rk, ImmOpnd:$sa))], ++ FrmR, opstr>; ++ ++/// Assert ++class Assert ++ : InstForm<(outs), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rj, $rk"), ++ [(set (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class Code15 ++ : InstForm<(outs), (ins uimm15:$Code), ++ !strconcat(opstr, "\t$Code"), ++ [(set (OpNode uimm15:$Code))], ++ FrmOther, opstr>; ++ ++class TrapBase ++ : LoongArchPseudo<(outs), (ins), [(trap)]>, ++ PseudoInstExpansion<(RealInst 0)> { ++ let isBarrier = 1; ++ let isTerminator = 1; ++ let isCodeGenOnly = 1; ++ let isCTI = 1; ++} ++ ++class CSR ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $csr"), ++ [(set RO:$rd, (OpNode ImmOpnd:$csr))], ++ FrmOther, opstr>; ++ ++class CSRW ++ : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $csr"), ++ [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$csr))], ++ FrmOther, opstr>{ ++ let Constraints = "$rd = $dst"; ++} ++ ++class CSRX ++ : InstForm<(outs RO:$dst), (ins RO:$rd, RO:$rj, ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $rj, $csr"), ++ [(set RO:$dst, (OpNode RO:$rd, RO:$rj, ImmOpnd:$csr))], ++ FrmOther, opstr>{ ++ let Constraints = "$rd = $dst"; ++} ++ ++class CAC ++ : InstForm<(outs), (ins uimm5:$op, RO:$rj, ImmOpnd:$si12), ++ !strconcat(opstr, "\t$op, $rj, $si12"), ++ [(set (OpNode uimm5:$op, RO:$rj, ImmOpnd:$si12))], ++ FrmOther, opstr>; ++ ++class LEVEL ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm8_64:$level), ++ !strconcat(opstr, "\t$rd, $rj, $level"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm8_64:$level))], ++ FrmOther, opstr>; ++ ++class SEQ ++ : InstForm<(outs), (ins RO:$rj, uimm8_64:$seq), ++ !strconcat(opstr, "\t$rj, $seq"), ++ [(set (OpNode RO:$rj, uimm8_64:$seq))], ++ FrmOther, opstr>; ++ ++class Wait ++ : InstForm<(outs), (ins uimm15:$hint), ++ !strconcat(opstr, "\t$hint"), ++ [(set (OpNode uimm15:$hint))], ++ FrmOther, opstr>; ++ ++class Invtlb ++ : InstForm<(outs), (ins uimm5:$op, RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$op, $rj, $rk"), ++ [(set (OpNode uimm5:$op, RO:$rj, RO:$rk))], ++ FrmOther, opstr>; ++ ++class OP32 ++ : InstForm<(outs), (ins), ++ !strconcat(opstr, ""), ++ [(set (OpNode))], ++ FrmOther, opstr>; ++ ++class Bar ++ : InstForm<(outs), (ins uimm15:$hint), ++ !strconcat(opstr, "\t$hint"), ++ [(set (OpNode uimm15:$hint))], ++ FrmOther, opstr>; ++ ++//class CA op, string opstr> ++// : R3_CA; ++ ++class SI16_R2 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, simm16:$si16), ++ !strconcat(opstr, "\t$rd, $rj, $si16"), ++ [(set RO:$rd, (OpNode RO:$rj, simm16:$si16))], ++ FrmR, opstr>; ++ ++class SI20 ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$rd, (OpNode ImmOpnd:$si20))], ++ FrmR, opstr>; ++let isCodeGenOnly = 1, Constraints = "$dst = $rd" in ++class SI20_R2 ++ : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$si20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$si20))], ++ FrmR, opstr>; ++class RELOC_rii ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20, ImmOpnd:$i20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$rd, (OpNode ImmOpnd:$si20, ImmOpnd:$i20))], ++ FrmR, opstr>; ++ ++// preld ++class Preld ++ : InstForm<(outs), (ins RO:$rj, MemOpnd:$addr, uimm5:$hint), ++ !strconcat(opstr, "\t$hint, $rj, $addr"), ++ [(set (OpNode RO:$rj, MemOpnd:$addr, uimm5:$hint))], ++ FrmR, opstr>; ++class Preld_Raw ++ : InstForm<(outs), (ins RO:$rj, simm12:$imm12, uimm5:$hint), ++ !strconcat(opstr, "\t$hint, $rj, $imm12"), ++ [], ++ FrmR, opstr>; ++class IsCall { ++ bit isCall = 1; ++ bit isCTI = 1; ++} ++ ++class EffectiveAddress ++ : InstForm<(outs RO:$rd), (ins mem_ea:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RO:$rd, addr:$addr)], FrmI, ++ !strconcat(opstr, "_lea")> { ++ let isCodeGenOnly = 1; ++ let hasNoSchedulingInfo = 1; ++ let DecoderMethod = "DecodeMem"; ++} ++ ++def PtrRC : Operand { ++ let MIOperandInfo = (ops ptr_rc); ++ let DecoderMethod = "DecodePtrRegisterClass"; ++ let ParserMatchClass = GPR32AsmOperand; ++} ++ ++class Atomic2Ops : ++ LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), ++ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; ++ ++class Atomic2OpsPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class Atomic2OpsSubwordPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2, ++ RC:$shiftamnt), []>; ++class AtomicCmpSwap : ++ LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), ++ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; ++ ++class AtomicCmpSwapPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class AtomicCmpSwapSubwordPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, ++ RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class LoongArchInstAlias : ++ InstAlias, PredicateControl; ++ ++//===---------------------------------------------------------------------===/ ++// Instruction Definitions. ++//===---------------------------------------------------------------------===/ ++/// ++/// R2 ++/// ++ ++def CLO_D : Count1<"clo.d", GPR64Opnd, ctlz>, R2I<0b01000>; ++def CLZ_D : Int_Reg2<"clz.d", GPR64Opnd, ctlz>, R2I<0b01001>; ++def CTO_D : Count1<"cto.d", GPR64Opnd, cttz>, R2I<0b01010>; ++def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; ++ ++def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] ++def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; ++def REVB_D : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>; ++def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; ++def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] ++ ++def BITREV_8B : Int_Reg2<"bitrev.8b", GPR64Opnd>, R2I<0b10011>; //[] ++def BITREV_D : Int_Reg2<"bitrev.d", GPR64Opnd, bitreverse>, R2I<0b10101>; ++ ++def EXT_W_H : SignExtInReg<"ext.w.h", GPR64Opnd, i16>, R2I<0b10110>; ++def EXT_W_B : SignExtInReg<"ext.w.b", GPR64Opnd, i8>, R2I<0b10111>; ++ ++def RDTIME_D : Int_Reg2_Rdtime<"rdtime.d", GPR64Opnd>, R2I<0b11010>; ++def RDTIMEL_W : Int_Reg2_Rdtime<"rdtimel.w", GPR64Opnd>, R2I<0b11000>; ++def RDTIMEH_W : Int_Reg2_Rdtime<"rdtimeh.w", GPR64Opnd>, R2I<0b11001>; ++/// ++/// R3 ++/// ++def ADD_D : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; ++def SUB_D : Int_Reg3<"sub.d", GPR64Opnd, sub>, R3I<0b0100011>; ++ ++def SLT : SetCC_R<"slt", GPR64Opnd, setlt>, R3I<0b0100100>; ++def SLTU : SetCC_R<"sltu", GPR64Opnd, setult>, R3I<0b0100101>; ++def MASKEQZ : Int_Reg3<"maskeqz", GPR64Opnd>, R3I<0b0100110>; //[] ++def MASKNEZ : Int_Reg3<"masknez", GPR64Opnd>, R3I<0b0100111>; //[] ++ ++def NOR : Nor<"nor", GPR64Opnd>, R3I<0b0101000>; ++def AND : Int_Reg3<"and", GPR64Opnd, and>, R3I<0b0101001>; ++def OR : Int_Reg3<"or", GPR64Opnd, or>, R3I<0b0101010>; ++def XOR : Int_Reg3<"xor", GPR64Opnd, xor>, R3I<0b0101011>; ++def ORN : Int_Reg3<"orn", GPR64Opnd>, R3I<0b0101100>; ++def ANDN : Int_Reg3<"andn", GPR64Opnd>, R3I<0b0101101>; ++ ++def SLL_D : Shift_Var<"sll.d", GPR64Opnd, shl>, R3I<0b0110001>; ++def SRL_D : Shift_Var<"srl.d", GPR64Opnd, srl>, R3I<0b0110010>; ++def SRA_D : Shift_Var<"sra.d", GPR64Opnd, sra>, R3I<0b0110011>; ++def ROTR_D: Shift_Var<"rotr.d", GPR64Opnd, rotr>, R3I<0b0110111>; ++ ++def MUL_D : Int_Reg3<"mul.d", GPR64Opnd, mul>, R3I<0b0111011>; ++def MULH_D : Int_Reg3<"mulh.d", GPR64Opnd, mulhs>, R3I<0b0111100>; ++def MULH_DU : Int_Reg3<"mulh.du", GPR64Opnd, mulhu>, R3I<0b0111101>; ++def MULW_D_W : Int_Reg3<"mulw.d.w", GPR64Opnd>, R3I<0b0111110>; ++def MULW_D_WU : Int_Reg3<"mulw.d.wu", GPR64Opnd>, R3I<0b0111111>; ++ ++let usesCustomInserter = 1 in { ++def DIV_D : Int_Reg3<"div.d", GPR64Opnd, sdiv>, R3I<0b1000100>; ++def MOD_D : Int_Reg3<"mod.d", GPR64Opnd, srem>, R3I<0b1000101>; ++def DIV_DU : Int_Reg3<"div.du", GPR64Opnd, udiv>, R3I<0b1000110>; ++def MOD_DU : Int_Reg3<"mod.du", GPR64Opnd, urem>, R3I<0b1000111>; ++} ++ ++def CRC_W_D_W : Int_Reg3_Crc<"crc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crc_w_d_w>, R3I<0b1001011>; ++def CRCC_W_D_W : Int_Reg3_Crc<"crcc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crcc_w_d_w>, R3I<0b1001111>; ++/// ++/// SLLI ++/// ++def SLLI_D : Shift_Imm64<"slli.d", GPR64Opnd, shl>, R2_IMM6<0b00>; ++def SRLI_D : Shift_Imm64<"srli.d", GPR64Opnd, srl>, R2_IMM6<0b01>; ++def SRAI_D : Shift_Imm64<"srai.d", GPR64Opnd, sra>, R2_IMM6<0b10>; ++def ROTRI_D : Shift_Imm64<"rotri.d", GPR64Opnd, rotr>, R2_IMM6<0b11>; ++/// ++/// Misc ++/// ++def ALSL_WU : Reg3_SaU<"alsl.wu", GPR64Opnd, GPR32Opnd, uimm2_plus1>, R3_SA2<0b00011> { ++ let Pattern = [(set GPR64Opnd:$rd, ++ (i64 (zext (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))))]; ++} ++ ++def ALSL_D : Reg3_Sa<"alsl.d", GPR64Opnd, uimm2_plus1>, R3_SA2<0b10110> { ++ let Pattern = [(set GPR64Opnd:$rd, ++ (add GPR64Opnd:$rk, (shl GPR64Opnd:$rj, immZExt2Alsl:$sa)))]; ++} ++def BYTEPICK_D : Reg3_Sa<"bytepick.d", GPR64Opnd, uimm3>, R3_SA3; //[] ++ ++def ASRTLE_D : Assert<"asrtle.d", GPR64Opnd, int_loongarch_asrtle_d>, ASSERT<0b10>; ++def ASRTGT_D : Assert<"asrtgt.d", GPR64Opnd, int_loongarch_asrtgt_d>, ASSERT<0b11>; ++ ++def DBCL : Code15<"dbcl">, CODE15<0b1010101>; ++def HYPCALL : Code15<"hypcall">, CODE15<0b1010111>; ++ ++/// ++/// R2_IMM12 ++/// ++def SLTI : SetCC_I<"slti", GPR64Opnd, simm12, setlt>, R2_IMM12<0b000>; ++def SLTUI : SetCC_I<"sltui", GPR64Opnd, simm12, setult>, R2_IMM12<0b001>; ++def ADDI_W64 : Int_Reg2_Imm12<"addi.w", GPR64Opnd, simm12>, R2_IMM12<0b010>; ++def ADDI_D : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; ++def LU52I_D : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; ++def ANDI : Int_Reg2_Imm12<"andi", GPR64Opnd, uimm12, and>, R2_IMM12<0b101>; ++def ORI : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; ++def XORI : Int_Reg2_Imm12<"xori", GPR64Opnd, uimm12, xor>, R2_IMM12<0b111>; ++ ++/// ++/// Privilege Instructions ++/// ++def CSRRD : CSR<"csrrd", GPR64Opnd, uimm14, int_loongarch_csrrd_d>, R1_CSR<0b0000000000100>; ++def CSRWR : CSRW<"csrwr", GPR64Opnd, uimm14, int_loongarch_csrwr_d>, R1_CSR<0b0000100000100>; ++def CSRXCHG : CSRX<"csrxchg", GPR64Opnd, uimm14, int_loongarch_csrxchg_d>, R2_CSR<0b00000100>; ++def IOCSRRD_D : Int_Reg2_Iocsrrd<"iocsrrd.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrrd_d>, R2P<0b011>; ++def IOCSRWR_D : Int_Reg2_Iocsrwr<"iocsrwr.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrwr_d>, R2P<0b111>; ++def CACOP : CAC<"cacop", GPR64Opnd, simm12, int_loongarch_cacop_d>, R1_CACHE; ++def LDDIR : LEVEL<"lddir", GPR64Opnd>, R2_LEVEL<0b00000110010000>; ++def LDPTE : SEQ<"ldpte", GPR64Opnd>, R1_SEQ<0b00000110010001>; ++ ++def IDLE : Wait<"idle">, WAIT_FM; ++def INVTLB : Invtlb<"invtlb", GPR64Opnd>, R2_INVTLB; ++// ++def IOCSRRD_B : Int_Reg2<"iocsrrd.b", GPR64Opnd>, R2P<0b000>; ++def IOCSRRD_H : Int_Reg2<"iocsrrd.h", GPR64Opnd>, R2P<0b001>; ++def IOCSRRD_W : Int_Reg2<"iocsrrd.w", GPR64Opnd>, R2P<0b010>; ++// ++def TLBCLR : OP32<"tlbclr", int_loongarch_tlbclr>, IMM32<0b001000>; ++def TLBFLUSH : OP32<"tlbflush", int_loongarch_tlbflush>, IMM32<0b001001>; ++def TLBSRCH : OP32<"tlbsrch", int_loongarch_tlbsrch>, IMM32<0b001010>; ++def TLBRD : OP32<"tlbrd", int_loongarch_tlbrd>, IMM32<0b001011>; ++def TLBWR : OP32<"tlbwr", int_loongarch_tlbwr>, IMM32<0b001100>; ++def TLBFILL : OP32<"tlbfill", int_loongarch_tlbfill>, IMM32<0b001101>; ++def ERTN : OP32<"ertn">, IMM32<0b001110>; ++ ++/// ++/// R1_IMM20 ++/// ++def ADDU16I_D : SI16_R2<"addu16i.d", GPR64Opnd>, R2_SI16<0b000100>; ++def LU12I_W : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; ++def LU32I_D : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def LU32I_D_R2 : SI20_R2<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def PCADDI : SI20<"pcaddi", GPR64Opnd, simm20>, R1_SI20<0b0001100>; ++def PCALAU12I : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; ++def PCADDU12I : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; ++def PCADDU18I : SI20<"pcaddu18i", GPR64Opnd, simm20>, R1_SI20<0b0001111>; ++ ++ ++def BEQZ : Beqz<"beqz", brtarget, seteq, GPR64Opnd>, R1_IMM21BEQZ<0b010000>; ++def BNEZ : Beqz<"bnez", brtarget, setne, GPR64Opnd>, R1_IMM21BEQZ<0b010001>; ++ ++def JIRL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; ++let isCall = 1, isCTI=1, isCodeGenOnly = 1 in { ++def JIRL_CALL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; ++} ++ ++def B : JumpFB, IMM26B<0b010100>; ++ ++def BEQ : Beq<"beq", brtarget, seteq, GPR64Opnd>, R2_IMM16BEQ<0b010110>; ++def BNE : Beq<"bne", brtarget, setne, GPR64Opnd>, R2_IMM16BEQ<0b010111>; ++def BLT : Beq<"blt", brtarget, setlt, GPR64Opnd>, R2_IMM16BEQ<0b011000>; ++def BGE : Beq<"bge", brtarget, setge, GPR64Opnd>, R2_IMM16BEQ<0b011001>; ++def BLTU : Beq<"bltu", brtarget, setult, GPR64Opnd>, R2_IMM16BEQ<0b011010>; ++def BGEU : Beq<"bgeu", brtarget, setuge, GPR64Opnd>, R2_IMM16BEQ<0b011011>; ++ ++/// ++/// Mem access ++/// ++class LLBase : ++ InstForm<(outs RO:$rd), (ins MO:$addr), !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ let mayLoad = 1; ++} ++ ++class SCBase : ++ InstForm<(outs RO:$dst), (ins RO:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), [], FrmI> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ let mayStore = 1; ++ let Constraints = "$rd = $dst"; ++} ++ ++class STGT_LE : ++ InstForm<(outs), (ins RO:$rd, RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [], FrmI, opstr>; ++ ++class Float_STGT_LE ++ : InstForm<(outs), (ins RD:$fd, RS:$rj, RS:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [], FrmR, opstr>; ++ ++def LL_D : LLBase<"ll.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b010>; ++def SC_D : SCBase<"sc.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b011>; ++ ++def LDPTR_W : LdPtr<"ldptr.w", GPR64Opnd>, LL_SC<0b100>; ++def STPTR_W : StPtr<"stptr.w", GPR64Opnd>, LL_SC<0b101>; ++def LDPTR_D : LdPtr<"ldptr.d", GPR64Opnd>, LL_SC<0b110>; ++def STPTR_D : StPtr<"stptr.d", GPR64Opnd>, LL_SC<0b111>; ++ ++def LD_B : Ld<"ld.b", GPR64Opnd, mem, sextloadi8>, LOAD_STORE<0b0000>; ++def LD_H : Ld<"ld.h", GPR64Opnd, mem, sextloadi16>, LOAD_STORE<0b0001>; ++def LD_W : Ld<"ld.w", GPR64Opnd, mem, sextloadi32>, LOAD_STORE<0b0010>; ++def LD_D : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; ++def ST_B : St<"st.b", GPR64Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; ++def ST_H : St<"st.h", GPR64Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; ++def ST_W : St<"st.w", GPR64Opnd, mem, truncstorei32>, LOAD_STORE<0b0110>; ++def ST_D : St<"st.d", GPR64Opnd, mem_simmptr, store>, LOAD_STORE<0b0111>; ++def LD_BU : Ld<"ld.bu", GPR64Opnd, mem, zextloadi8>, LOAD_STORE<0b1000>; ++def LD_HU : Ld<"ld.hu", GPR64Opnd, mem, zextloadi16>, LOAD_STORE<0b1001>; ++def LD_WU : Ld<"ld.wu", GPR64Opnd, mem, zextloadi32>, LOAD_STORE<0b1010>; ++ ++def AMSWAP_W : ATOMIC<"amswap.w", GPR32Opnd, amem>, AM<0b000000>; ++def AMSWAP_D : ATOMIC<"amswap.d", GPR64Opnd, amem>, AM<0b000001>; ++def AMADD_W : ATOMIC<"amadd.w", GPR32Opnd, amem>, AM<0b000010>; ++def AMADD_D : ATOMIC<"amadd.d", GPR64Opnd, amem>, AM<0b000011>; ++def AMAND_W : ATOMIC<"amand.w", GPR32Opnd, amem>, AM<0b000100>; ++def AMAND_D : ATOMIC<"amand.d", GPR64Opnd, amem>, AM<0b000101>; ++def AMOR_W : ATOMIC<"amor.w", GPR32Opnd, amem>, AM<0b000110>; ++def AMOR_D : ATOMIC<"amor.d", GPR64Opnd, amem>, AM<0b000111>; ++def AMXOR_W : ATOMIC<"amxor.w", GPR32Opnd, amem>, AM<0b001000>; ++def AMXOR_D : ATOMIC<"amxor.d", GPR64Opnd, amem>, AM<0b001001>; ++def AMMAX_W : ATOMIC<"ammax.w", GPR32Opnd, amem>, AM<0b001010>; ++def AMMAX_D : ATOMIC<"ammax.d", GPR64Opnd, amem>, AM<0b001011>; ++def AMMIN_W : ATOMIC<"ammin.w", GPR32Opnd, amem>, AM<0b001100>; ++def AMMIN_D : ATOMIC<"ammin.d", GPR64Opnd, amem>, AM<0b001101>; ++def AMMAX_WU : ATOMIC<"ammax.wu", GPR32Opnd, amem>, AM<0b001110>; ++def AMMAX_DU : ATOMIC<"ammax.du", GPR64Opnd, amem>, AM<0b001111>; ++def AMMIN_WU : ATOMIC<"ammin.wu", GPR32Opnd, amem>, AM<0b010000>; ++def AMMIN_DU : ATOMIC<"ammin.du", GPR64Opnd, amem>, AM<0b010001>; ++ ++ ++def AMSWAP_DB_W : ATOMIC<"amswap_db.w", GPR32Opnd, amem>, AM<0b010010>; ++def AMSWAP_DB_D : ATOMIC<"amswap_db.d", GPR64Opnd, amem>, AM<0b010011>; ++def AMADD_DB_W : ATOMIC<"amadd_db.w", GPR32Opnd, amem>, AM<0b010100>; ++def AMADD_DB_D : ATOMIC<"amadd_db.d", GPR64Opnd, amem>, AM<0b010101>; ++def AMAND_DB_W : ATOMIC<"amand_db.w", GPR32Opnd, amem>, AM<0b010110>; ++def AMAND_DB_D : ATOMIC<"amand_db.d", GPR64Opnd, amem>, AM<0b010111>; ++def AMOR_DB_W : ATOMIC<"amor_db.w", GPR32Opnd, amem>, AM<0b011000>; ++def AMOR_DB_D : ATOMIC<"amor_db.d", GPR64Opnd, amem>, AM<0b011001>; ++def AMXOR_DB_W : ATOMIC<"amxor_db.w", GPR32Opnd, amem>, AM<0b011010>; ++def AMXOR_DB_D : ATOMIC<"amxor_db.d", GPR64Opnd, amem>, AM<0b011011>; ++def AMMAX_DB_W : ATOMIC<"ammax_db.w", GPR32Opnd, amem>, AM<0b011100>; ++def AMMAX_DB_D : ATOMIC<"ammax_db.d", GPR64Opnd, amem>, AM<0b011101>; ++def AMMIN_DB_W : ATOMIC<"ammin_db.w", GPR32Opnd, amem>, AM<0b011110>; ++def AMMIN_DB_D : ATOMIC<"ammin_db.d", GPR64Opnd, amem>, AM<0b011111>; ++def AMMAX_DB_WU : ATOMIC<"ammax_db.wu", GPR32Opnd, amem>, AM<0b100000>; ++def AMMAX_DB_DU : ATOMIC<"ammax_db.du", GPR64Opnd, amem>, AM<0b100001>; ++def AMMIN_DB_WU : ATOMIC<"ammin_db.wu", GPR32Opnd, amem>, AM<0b100010>; ++def AMMIN_DB_DU : ATOMIC<"ammin_db.du", GPR64Opnd, amem>, AM<0b100011>; ++ ++def LDGT_B : Int_Reg3<"ldgt.b", GPR64Opnd>, R3MI<0b11110000>; ++def LDGT_H : Int_Reg3<"ldgt.h", GPR64Opnd>, R3MI<0b11110001>; ++def LDGT_W : Int_Reg3<"ldgt.w", GPR64Opnd>, R3MI<0b11110010>; ++def LDGT_D : Int_Reg3<"ldgt.d", GPR64Opnd>, R3MI<0b11110011>; ++def LDLE_B : Int_Reg3<"ldle.b", GPR64Opnd>, R3MI<0b11110100>; ++def LDLE_H : Int_Reg3<"ldle.h", GPR64Opnd>, R3MI<0b11110101>; ++def LDLE_W : Int_Reg3<"ldle.w", GPR64Opnd>, R3MI<0b11110110>; ++def LDLE_D : Int_Reg3<"ldle.d", GPR64Opnd>, R3MI<0b11110111>; ++def STGT_B : STGT_LE<"stgt.b", GPR64Opnd>, R3MI<0b11111000>; ++def STGT_H : STGT_LE<"stgt.h", GPR64Opnd>, R3MI<0b11111001>; ++def STGT_W : STGT_LE<"stgt.w", GPR64Opnd>, R3MI<0b11111010>; ++def STGT_D : STGT_LE<"stgt.d", GPR64Opnd>, R3MI<0b11111011>; ++def STLE_B : STGT_LE<"stle.b", GPR64Opnd>, R3MI<0b11111100>; ++def STLE_H : STGT_LE<"stle.h", GPR64Opnd>, R3MI<0b11111101>; ++def STLE_W : STGT_LE<"stle.w", GPR64Opnd>, R3MI<0b11111110>; ++def STLE_D : STGT_LE<"stle.d", GPR64Opnd>, R3MI<0b11111111>; ++ ++let isCodeGenOnly = 1 in { ++def PRELD : Preld<"preld", mem, GPR64Opnd>, PRELD_FM; ++} ++ ++def PRELD_Raw : Preld_Raw<"preld", GPR64Opnd>, PRELD_FM; ++ ++let isCall=1, isCTI=1, Defs = [RA] in { ++ class JumpLink : ++ InstForm<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), ++ [(LoongArchJmpLink tglobaladdr:$target)], FrmJ, opstr> { ++ let DecoderMethod = "DecodeJumpTarget"; ++ } ++} ++def LONG_BRANCH_PCADDU12I : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins brtarget:$tgt), []>, GPR_64; ++ ++def LONG_BRANCH_ADDID2Op : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64; ++ ++def LONG_BRANCH_ADDID : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64; ++ ++def LEA_ADDI_D: EffectiveAddress<"addi.d", GPR64Opnd>, LEA_ADDI_FM<0b011>, GPR_64; ++ ++class PseudoReturnBase : LoongArchPseudo<(outs), (ins RO:$rs), ++ []> { + let isTerminator = 1; ++ let isBarrier = 1; ++ let isReturn = 1; ++ let isCodeGenOnly = 1; ++ let hasCtrlDep = 1; ++ let hasExtraSrcRegAllocReq = 1; ++ bit isCTI = 1; ++} ++ ++def PseudoReturn64 : PseudoReturnBase; ++//def PseudoReturn : PseudoReturnBase; ++ ++ ++let isCall=1, isCTI=1, Defs=[RA], isCodeGenOnly=1 in { ++def PseudoCall : LoongArchPseudo<(outs), (ins calltarget:$target), ++ []>; + } +-class Br_I26 op, string opstr> +- : FmtI26 { +- let isBranch = 1; ++ ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in ++def PseudoTailCall : LoongArchPseudo<(outs), (ins calltarget:$target), ++ []>; ++ ++class PseudoTailBase : LoongArchPseudo<(outs), (ins opnd:$offset26), ++ []> { + let isTerminator = 1; ++ let isBarrier = 1; ++ let isReturn = 1; ++ let isCodeGenOnly = 1; + } ++def PseudoTailReturn : PseudoTailBase; + +-let mayLoad = 1 in { +-class LOAD_3R op, string opstr> +- : Fmt3R; +-class LOAD_2RI12 op, string opstr> +- : Fmt2RI12; +-class LOAD_2RI14 op, string opstr> +- : Fmt2RI14; +-} // mayLoad = 1 +- +-let mayStore = 1 in { +-class STORE_3R op, string opstr> +- : Fmt3R; +-class STORE_2RI12 op, string opstr> +- : Fmt2RI12; +-class STORE_2RI14 op, string opstr> +- : Fmt2RI14; +-} // mayStore = 1 +- +-let mayLoad = 1, mayStore = 1 in +-class AM_3R op, string opstr> +- : Fmt3R; +- +-let mayLoad = 1 in +-class LLBase op, string opstr> +- : Fmt2RI14; +- +-let mayStore = 1, Constraints = "$rd = $dst" in +-class SCBase op, string opstr> +- : Fmt2RI14; +- +-class IOCSRRD op, string opstr> +- : Fmt2R; +- +-class IOCSRWR op, string opstr> +- : Fmt2R; + +-//===----------------------------------------------------------------------===// +-// Basic Integer Instructions +-//===----------------------------------------------------------------------===// ++def : LoongArchPat<(LoongArchTailCall tglobaladdr:$dst), ++ (PseudoTailCall tglobaladdr:$dst)>; + +-// Arithmetic Operation Instructions +-def ADD_W : ALU_3R<0b00000000000100000, "add.w">; +-def SUB_W : ALU_3R<0b00000000000100010, "sub.w">; +-def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12>; +-def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>; +-def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20>; +-def SLT : ALU_3R<0b00000000000100100, "slt">; +-def SLTU : ALU_3R<0b00000000000100101, "sltu">; +-def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>; +-def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>; +-def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>; +-def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>; +-def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20>; +-def AND : ALU_3R<0b00000000000101001, "and">; +-def OR : ALU_3R<0b00000000000101010, "or">; +-def NOR : ALU_3R<0b00000000000101000, "nor">; +-def XOR : ALU_3R<0b00000000000101011, "xor">; +-def ANDN : ALU_3R<0b00000000000101101, "andn">; +-def ORN : ALU_3R<0b00000000000101100, "orn">; +-def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>; +-def ORI : ALU_2RI12<0b0000001110, "ori", uimm12>; +-def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; +-def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; +-def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; +-def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; +-let usesCustomInserter = true in { +-def DIV_W : ALU_3R<0b00000000001000000, "div.w">; +-def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; +-def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; +-def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; +-} // usesCustomInserter = true +- +-// Bit-shift Instructions +-def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; +-def SRL_W : ALU_3R<0b00000000000101111, "srl.w">; +-def SRA_W : ALU_3R<0b00000000000110000, "sra.w">; +-def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">; +- +-def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>; +-def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>; +-def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>; +-def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>; +- +-// Bit-manipulation Instructions +-def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">; +-def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">; +-def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">; +-def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">; +-def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">; +-def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">; +-def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>; +-def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">; +-def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">; +-def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">; +-let Constraints = "$rd = $dst" in { +-def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst), +- (ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw), +- "bstrins.w", "$rd, $rj, $msbw, $lsbw">; +-} +-def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd), +- (ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw), +- "bstrpick.w", "$rd, $rj, $msbw, $lsbw">; +-def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">; +-def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">; +- +-// Branch Instructions +-def BEQ : BrCC_2RI16<0b010110, "beq">; +-def BNE : BrCC_2RI16<0b010111, "bne">; +-def BLT : BrCC_2RI16<0b011000, "blt">; +-def BGE : BrCC_2RI16<0b011001, "bge">; +-def BLTU : BrCC_2RI16<0b011010, "bltu">; +-def BGEU : BrCC_2RI16<0b011011, "bgeu">; +-def BEQZ : BrCCZ_1RI21<0b010000, "beqz">; +-def BNEZ : BrCCZ_1RI21<0b010001, "bnez">; +-def B : Br_I26<0b010100, "b">; +- +-let isCall = 1 in +-def BL : FmtI26<0b010101, (outs), (ins simm26_lsl2:$imm26), "bl", "$imm26">; +-def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd), +- (ins GPR:$rj, simm16_lsl2:$imm16), "jirl", +- "$rd, $rj, $imm16">; +- +-// Common Memory Access Instructions +-def LD_B : LOAD_2RI12<0b0010100000, "ld.b">; +-def LD_H : LOAD_2RI12<0b0010100001, "ld.h">; +-def LD_W : LOAD_2RI12<0b0010100010, "ld.w">; +-def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">; +-def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">; +-def ST_B : STORE_2RI12<0b0010100100, "st.b">; +-def ST_H : STORE_2RI12<0b0010100101, "st.h">; +-def ST_W : STORE_2RI12<0b0010100110, "st.w">; +-def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld", +- "$imm5, $rj, $imm12">; +- +-// Atomic Memory Access Instructions +-def LL_W : LLBase<0b00100000, "ll.w">; +-def SC_W : SCBase<0b00100001, "sc.w">; +- +-// Barrier Instructions +-def DBAR : MISC_I15<0b00111000011100100, "dbar">; +-def IBAR : MISC_I15<0b00111000011100101, "ibar">; +- +-// Other Miscellaneous Instructions +-def SYSCALL : MISC_I15<0b00000000001010110, "syscall">; +-def BREAK : MISC_I15<0b00000000001010100, "break">; +-def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">; +-def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">; +-def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">; +- +-/// LA64 instructions +- +-let Predicates = [IsLA64] in { +- +-// Arithmetic Operation Instructions for 64-bits +-def ADD_D : ALU_3R<0b00000000000100001, "add.d">; +-def SUB_D : ALU_3R<0b00000000000100011, "sub.d">; +-def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12>; +-def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>; +-def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>; +-def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>; +-let Constraints = "$rd = $dst" in { +-def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst), +- (ins GPR:$rd, simm20:$imm20), "lu32i.d", +- "$rd, $imm20">; +-} +-def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12>; +-def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>; +-def MUL_D : ALU_3R<0b00000000000111011, "mul.d">; +-def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; +-def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; +-def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; +-def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; +-let usesCustomInserter = true in { +-def DIV_D : ALU_3R<0b00000000001000100, "div.d">; +-def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; +-def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; +-def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; +-} // usesCustomInserter = true +- +-// Bit-shift Instructions for 64-bits +-def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; +-def SRL_D : ALU_3R<0b00000000000110010, "srl.d">; +-def SRA_D : ALU_3R<0b00000000000110011, "sra.d">; +-def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">; +-def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>; +-def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>; +-def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>; +-def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>; +- +-// Bit-manipulation Instructions for 64-bits +-def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">; +-def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">; +-def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">; +-def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">; +-def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>; +-def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">; +-def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">; +-def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">; +-def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">; +-def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">; +-def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">; +-def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">; +-let Constraints = "$rd = $dst" in { +-def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst), +- (ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), +- "bstrins.d", "$rd, $rj, $msbd, $lsbd">; +-} +-def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd), +- (ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd), +- "bstrpick.d", "$rd, $rj, $msbd, $lsbd">; +- +-// Common Memory Access Instructions for 64-bits +-def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">; +-def LD_D : LOAD_2RI12<0b0010100011, "ld.d">; +-def ST_D : STORE_2RI12<0b0010100111, "st.d">; +-def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">; +-def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">; +-def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">; +-def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">; +-def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">; +-def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">; +-def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">; +-def STX_B : STORE_3R<0b00111000000100000, "stx.b">; +-def STX_H : STORE_3R<0b00111000000101000, "stx.h">; +-def STX_W : STORE_3R<0b00111000000110000, "stx.w">; +-def STX_D : STORE_3R<0b00111000000111000, "stx.d">; +-def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">; +-def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">; +-def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">; +-def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">; +-def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx", +- "$imm5, $rj, $rk">; +- +-// Bound Check Memory Access Instructions +-def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">; +-def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">; +-def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">; +-def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">; +-def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">; +-def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">; +-def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">; +-def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">; +-def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">; +-def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">; +-def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">; +-def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">; +-def STLE_B : STORE_3R<0b00111000011111100, "stle.b">; +-def STLE_H : STORE_3R<0b00111000011111101, "stle.h">; +-def STLE_W : STORE_3R<0b00111000011111110, "stle.w">; +-def STLE_D : STORE_3R<0b00111000011111111, "stle.d">; +- +-// Atomic Memory Access Instructions for 64-bits +-def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">; +-def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">; +-def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">; +-def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">; +-def AMAND_W : AM_3R<0b00111000011000100, "amand.w">; +-def AMAND_D : AM_3R<0b00111000011000101, "amand.d">; +-def AMOR_W : AM_3R<0b00111000011000110, "amor.w">; +-def AMOR_D : AM_3R<0b00111000011000111, "amor.d">; +-def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">; +-def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">; +-def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">; +-def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">; +-def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">; +-def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">; +-def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">; +-def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">; +-def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">; +-def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">; +-def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">; +-def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">; +-def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">; +-def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">; +-def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">; +-def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">; +-def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">; +-def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">; +-def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">; +-def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">; +-def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">; +-def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">; +-def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">; +-def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">; +-def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">; +-def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">; +-def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">; +-def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">; +-def LL_D : LLBase<0b00100010, "ll.d">; +-def SC_D : SCBase<0b00100011, "sc.d">; +- +-// CRC Check Instructions +-def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">; +-def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">; +-def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">; +-def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">; +-def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">; +-def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">; +-def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">; +-def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">; +- +-// Other Miscellaneous Instructions for 64-bits +-def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk), +- "asrtle.d", "$rj, $rk">; +-def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk), +- "asrtgt.d", "$rj, $rk">; +-def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">; +-} // Predicates = [IsLA64] ++def : LoongArchPat<(LoongArchTailCall texternalsym:$dst), ++ (PseudoTailCall texternalsym:$dst)>; + +-//===----------------------------------------------------------------------===// +-// Pseudo-instructions and codegen patterns +-// +-// Naming convention: For 'generic' pattern classes, we use the naming +-// convention PatTy1Ty2. +-//===----------------------------------------------------------------------===// ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isIndirectBranch = 1, Uses = [SP] in ++def PseudoTAILIndirect : LoongArchPseudo<(outs), (ins GPRTC64Opnd:$rj), [(LoongArchTailCall GPRTC64Opnd:$rj)]>, ++ PseudoInstExpansion<(JIRL ZERO_64, GPR64Opnd:$rj, 0)>; + +-/// Generic pattern classes +- +-class PatGprGpr +- : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; +-class PatGprGpr_32 +- : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; +- +-class PatGprImm +- : Pat<(OpNode GPR:$rj, ImmOpnd:$imm), +- (Inst GPR:$rj, ImmOpnd:$imm)>; +-class PatGprImm_32 +- : Pat<(sext_inreg (OpNode GPR:$rj, ImmOpnd:$imm), i32), +- (Inst GPR:$rj, ImmOpnd:$imm)>; +- +-/// Simple arithmetic operations +- +-// Match both a plain shift and one where the shift amount is masked (this is +-// typically introduced when the legalizer promotes the shift amount and +-// zero-extends it). For LoongArch, the mask is unnecessary as shifts in the +-// base ISA only read the least significant 5 bits (LA32) or 6 bits (LA64). +-def shiftMaskGRLen +- : ComplexPattern; +-def shiftMask32 : ComplexPattern; +- +-def sexti32 : ComplexPattern; +-def zexti32 : ComplexPattern; +- +-class shiftop +- : PatFrag<(ops node:$val, node:$count), +- (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>; +-class shiftopw +- : PatFrag<(ops node:$val, node:$count), +- (operator node:$val, (i64 (shiftMask32 node:$count)))>; +- +-let Predicates = [IsLA32] in { +-def : PatGprGpr; +-def : PatGprImm; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-} // Predicates = [IsLA32] +- +-let Predicates = [IsLA64] in { +-def : PatGprGpr; +-def : PatGprGpr_32; +-def : PatGprImm; +-def : PatGprImm_32; +-def : PatGprGpr; +-def : PatGprGpr_32; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the +-// product are used. +-def : PatGprGpr; +-def : PatGprGpr; +-def : PatGprGpr; +-// Select MULW_D_W for calculating the full 64 bits product of i32xi32 signed +-// multiplication. +-def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))), +- (MULW_D_W GPR:$rj, GPR:$rk)>; +-// Select MULW_D_WU for calculating the full 64 bits product of i32xi32 +-// unsigned multiplication. +-def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), +- (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), +- (MULW_D_WU GPR:$rj, GPR:$rk)>; +-} // Predicates = [IsLA64] +- +-def : PatGprGpr; +-def : PatGprImm; +-def : PatGprGpr; +-def : PatGprImm; +-def : PatGprGpr; +-def : PatGprImm; +- +-/// Shift +- +-let Predicates = [IsLA32] in { +-def : PatGprGpr, SLL_W>; +-def : PatGprGpr, SRA_W>; +-def : PatGprGpr, SRL_W>; +-def : PatGprImm; +-def : PatGprImm; +-def : PatGprImm; +-} // Predicates = [IsLA32] +- +-let Predicates = [IsLA64] in { +-def : PatGprGpr, SLL_W>; +-def : PatGprGpr, SRA_W>; +-def : PatGprGpr, SRL_W>; +-def : PatGprGpr, SLL_D>; +-def : PatGprGpr, SRA_D>; +-def : PatGprGpr, SRL_D>; +-def : PatGprImm; +-def : PatGprImm; +-def : PatGprImm; +-} // Predicates = [IsLA64] +- +-/// sext and zext +- +-def : Pat<(sext_inreg GPR:$rj, i8), (EXT_W_B GPR:$rj)>; +-def : Pat<(sext_inreg GPR:$rj, i16), (EXT_W_H GPR:$rj)>; +- +-let Predicates = [IsLA64] in { +-def : Pat<(sext_inreg GPR:$rj, i32), (ADDI_W GPR:$rj, 0)>; +-} // Predicates = [IsLA64] +- +-/// Setcc +- +-def : PatGprGpr; +-def : PatGprImm; +-def : PatGprGpr; +-def : PatGprImm; +- +-// Define pattern expansions for setcc operations that aren't directly +-// handled by a LoongArch instruction. +-def : Pat<(seteq GPR:$rj, 0), (SLTUI GPR:$rj, 1)>; +-def : Pat<(seteq GPR:$rj, GPR:$rk), (SLTUI (XOR GPR:$rj, GPR:$rk), 1)>; +-let Predicates = [IsLA32] in { +-def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), +- (SLTUI (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; +-} // Predicates = [IsLA32] +-let Predicates = [IsLA64] in { +-def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), +- (SLTUI (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; +-} // Predicates = [IsLA64] +-def : Pat<(setne GPR:$rj, 0), (SLTU R0, GPR:$rj)>; +-def : Pat<(setne GPR:$rj, GPR:$rk), (SLTU R0, (XOR GPR:$rj, GPR:$rk))>; +-let Predicates = [IsLA32] in { +-def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), +- (SLTU R0, (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)))>; +-} // Predicates = [IsLA32] +-let Predicates = [IsLA64] in { +-def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), +- (SLTU R0, (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)))>; +-} // Predicates = [IsLA64] +-def : Pat<(setugt GPR:$rj, GPR:$rk), (SLTU GPR:$rk, GPR:$rj)>; +-def : Pat<(setuge GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rj, GPR:$rk), 1)>; +-def : Pat<(setule GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rk, GPR:$rj), 1)>; +-def : Pat<(setgt GPR:$rj, GPR:$rk), (SLT GPR:$rk, GPR:$rj)>; +-def : Pat<(setge GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rj, GPR:$rk), 1)>; +-def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>; +- +-/// Select +- +-def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), +- (OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>; +- +-/// Branches and jumps +- +-class BccPat +- : Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16), +- (Inst GPR:$rj, GPR:$rd, bb:$imm16)>; +- +-def : BccPat; +-def : BccPat; +-def : BccPat; +-def : BccPat; +-def : BccPat; +-def : BccPat; +- +-class BccSwapPat +- : Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16), +- (InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>; +- +-// Condition codes that don't have matching LoongArch branch instructions, but +-// are trivially supported by swapping the two input operands. +-def : BccSwapPat; +-def : BccSwapPat; +-def : BccSwapPat; +-def : BccSwapPat; +- +-// An extra pattern is needed for a brcond without a setcc (i.e. where the +-// condition was calculated elsewhere). +-def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>; +- +-let isBarrier = 1, isBranch = 1, isTerminator = 1 in +-def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>, +- PseudoInstExpansion<(B simm26_lsl2:$imm26)>; +- +-let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in +-def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>, +- PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; +- +-def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; +-def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), +- (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; +- +-let isCall = 1, Defs = [R1] in +-def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { +- let AsmString = "bl\t$func"; +-} +- +-def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; +-def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; +- +-let isCall = 1, Defs = [R1] in +-def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), +- [(loongarch_call GPR:$rj)]>, +- PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; +- +-let isBarrier = 1, isReturn = 1, isTerminator = 1 in +-def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, +- PseudoInstExpansion<(JIRL R0, R1, 0)>; +- +-/// BSTRINS and BSTRPICK +- +-let Predicates = [IsLA32] in { +-def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd), +- (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; +-def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), +- (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; +-} // Predicates = [IsLA32] +- +-let Predicates = [IsLA64] in { +-def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), +- (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; +-def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), +- (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; +-} // Predicates = [IsLA64] +- +-/// Loads +- +-multiclass LdPat { +- def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; +- def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))), +- (Inst BaseAddr:$rj, simm12:$imm12)>; +-} +- +-defm : LdPat; +-defm : LdPat; +-defm : LdPat; +-defm : LdPat; +-defm : LdPat, Requires<[IsLA32]>; +-defm : LdPat; +-defm : LdPat; +-let Predicates = [IsLA64] in { +-defm : LdPat; +-defm : LdPat; +-defm : LdPat; +-defm : LdPat; +-} // Predicates = [IsLA64] +- +-/// Stores +- +-multiclass StPat { +- def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj), +- (Inst StTy:$rd, BaseAddr:$rj, 0)>; +- def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)), +- (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>; +-} +- +-defm : StPat; +-defm : StPat; +-defm : StPat, Requires<[IsLA32]>; +-let Predicates = [IsLA64] in { +-defm : StPat; +-defm : StPat; +-} // Predicates = [IsLA64] +- +-/// Atomic loads and stores +- +-def : Pat<(atomic_fence timm, timm), (DBAR 0)>; +- +-defm : LdPat; +-defm : LdPat; +-defm : LdPat; +- +-defm : StPat; +-defm : StPat; +-defm : StPat, Requires<[IsLA32]>; +-let Predicates = [IsLA64] in { +-defm : LdPat; +-defm : StPat; +-defm : StPat; +-} // Predicates = [IsLA64] +- +-/// Other pseudo-instructions +- +-// Pessimistically assume the stack pointer will be clobbered +-let Defs = [R3], Uses = [R3] in { +-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), +- [(callseq_start timm:$amt1, timm:$amt2)]>; +-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), +- [(callseq_end timm:$amt1, timm:$amt2)]>; +-} // Defs = [R3], Uses = [R3] + +-//===----------------------------------------------------------------------===// +-// Assembler Pseudo Instructions +-//===----------------------------------------------------------------------===// ++def : LoongArchPat<(LoongArchJmpLink tglobaladdr:$dst), ++ (PseudoCall tglobaladdr:$dst)>; ++ ++def : LoongArchPat<(LoongArchJmpLink (i32 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++ ++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++ ++def BL : JumpLink<"bl", calltarget>, FJ<0b010101>; ++ ++class IsAsCheapAsAMove { ++ bit isAsCheapAsAMove = 1; ++} ++class LoadUpper: ++ InstForm<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), ++ [], FrmI, opstr>, IsAsCheapAsAMove { ++ let hasSideEffects = 0; ++ let isReMaterializable = 1; ++ let mayLoad = 1; ++} ++ ++let isCodeGenOnly = 1 in { ++def LAPCREL : LoadUpper<"la.pcrel", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64; ++} ++ ++def NOP : LoongArchPseudo<(outs), (ins), []>, ++ PseudoInstExpansion<(ANDI ZERO_64, ZERO_64, 0)>; ++ ++def : LoongArchInstAlias<"nop", (ANDI ZERO_64, ZERO_64, 0), 1>; ++def : LoongArchInstAlias<"jr $rd", (JIRL ZERO_64, GPR64Opnd:$rd, 0), 1>; ++def : LoongArchInstAlias<"move $dst, $src", ++ (OR GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; ++ ++def UImm12RelaxedAsmOperandClass ++: UImmAsmOperandClass<12, [ConstantUImm20AsmOperandClass]> { ++ let Name = "UImm12_Relaxed"; ++ let PredicateMethod = "isAnyImm<12>"; ++ let DiagnosticType = "UImm12_Relaxed"; ++} ++ ++def SImm12RelaxedAsmOperandClass ++: SImmAsmOperandClass<12, [UImm12RelaxedAsmOperandClass]> { ++ let Name = "SImm12_Relaxed"; ++ let PredicateMethod = "isAnyImm<12>"; ++ let DiagnosticType = "SImm12_Relaxed"; ++} ++ ++def simm12_relaxed : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; ++ let ParserMatchClass = !cast("SImm12RelaxedAsmOperandClass"); ++} ++ ++def : LoongArchPat<(i64 (anyext GPR32:$src)), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>,GPR_64; ++ ++let usesCustomInserter = 1 in { ++ def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; ++ def ATOMIC_SWAP_I64 : Atomic2Ops; ++ def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; ++ ++ def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops; ++} ++ ++def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; ++ ++def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA; ++ ++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_64 addrimm14lsl2:$a), (LDPTR_D addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_64 addr:$a), (LD_D addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(atomic_store_8 addr:$a, GPR64:$v), ++ (ST_B GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_16 addr:$a, GPR64:$v), ++ (ST_H GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR64:$v), ++ (STPTR_W GPR64:$v, addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_32 addr:$a, GPR64:$v), ++ (ST_W GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_64 addrimm14lsl2:$a, GPR64:$v), ++ (STPTR_D GPR64:$v, addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_64 addr:$a, GPR64:$v), ++ (ST_D GPR64:$v, addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(bswap GPR64:$rt), (REVH_D (REVB_4H GPR64:$rt))>; ++ ++def immZExt5 : ImmLeaf; + +-def : InstAlias<"nop", (ANDI R0, R0, 0)>; +-def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>; ++def immZExtRange2To64 : PatLeaf<(imm), [{ ++ return isUInt<7>(N->getZExtValue()) && (N->getZExtValue() >= 2) && ++ (N->getZExtValue() <= 64); ++}]>; ++ ++// bstrins and bstrpick ++class InsBase ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src), ++ !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src))], ++ FrmR, opstr> { ++ let Constraints = "$src = $rd"; ++ } ++ ++class InsBase_32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src), ++ !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src))], ++ FrmR, opstr> { ++ let Constraints = "$src = $rd"; ++} ++ ++class PickBase ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd), ++ !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), ++ [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd))], ++ FrmR, opstr>; ++ ++class PickBase_32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw), ++ !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), ++ [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw))], ++ FrmR, opstr>; ++ ++ def BSTRINS_D : InsBase<"bstrins.d", GPR64Opnd, uimm6, LoongArchBstrins>, ++ INSERT_BIT64<0>; ++ def BSTRPICK_D : PickBase<"bstrpick.d", GPR64Opnd, uimm6, LoongArchBstrpick>, ++ INSERT_BIT64<1>; ++ ++let isCodeGenOnly = 1 in { ++ def ZEXT64_32 : InstForm<(outs GPR64Opnd:$rd), ++ (ins GPR32Opnd:$rj, uimm6:$msbd, ++ uimm6:$lsbd), ++ "bstrpick.d $rd, $rj, $msbd, $lsbd", [], FrmR, "bstrpick.d">, ++ INSERT_BIT64<1>; ++} ++ ++//32-to-64-bit extension ++def : LoongArchPat<(i64 (zext GPR32:$src)), (ZEXT64_32 GPR32:$src, 31, 0)>; ++def : LoongArchPat<(i64 (extloadi1 addr:$src)), (LD_B addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi8 addr:$src)), (LD_B addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi16 addr:$src)), (LD_H addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi32 addr:$src)), (LD_W addr:$src)>, ++ GPR_64; ++ ++class LDX_FT_LA : ++ InstForm<(outs DRC:$rd), (ins PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set DRC:$rd, (OpNode (add iPTR:$rj, iPTR:$rk)))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class STX_FT_LA : ++ InstForm<(outs), (ins DRC:$rd, PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(OpNode DRC:$rd, (add iPTR:$rj, iPTR:$rk))], ++ FrmI, opstr> { ++ string BaseOpcode = opstr; ++ let mayStore = 1; ++ let AddedComplexity = 20; ++} ++ ++ ++def LDX_B : LDX_FT_LA<"ldx.b", GPR64Opnd, sextloadi8>, ++ R3MI<0b00000000>; ++def LDX_H : LDX_FT_LA<"ldx.h", GPR64Opnd, sextloadi16>, ++ R3MI<0b00001000>; ++def LDX_W : LDX_FT_LA<"ldx.w", GPR64Opnd, sextloadi32>, ++ R3MI<0b00010000>; ++def LDX_D : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, ++ R3MI<0b00011000>; ++def STX_B : STX_FT_LA<"stx.b", GPR64Opnd, truncstorei8>, ++ R3MI<0b00100000>; ++def STX_H : STX_FT_LA<"stx.h", GPR64Opnd, truncstorei16>, ++ R3MI<0b00101000>; ++def STX_W : STX_FT_LA<"stx.w", GPR64Opnd, truncstorei32>, ++ R3MI<0b00110000>; ++def STX_D : STX_FT_LA<"stx.d", GPR64Opnd, store>, ++ R3MI<0b00111000>; ++def LDX_BU : LDX_FT_LA<"ldx.bu", GPR64Opnd, extloadi8>, ++ R3MI<0b01000000>; ++def LDX_HU : LDX_FT_LA<"ldx.hu", GPR64Opnd, extloadi16>, ++ R3MI<0b01001000>; ++def LDX_WU : LDX_FT_LA<"ldx.wu", GPR64Opnd, zextloadi32>, ++ R3MI<0b01010000>; ++ ++//def : LoongArchPat<(bswap GPR64:$rj), (REVH_D (REVB_4H GPR64:$rj))>; ++//def : LoongArchPat<(bswap GPR64:$rj), (ROTRI_D (REVB_2W GPR64:$rj), 32)>; ++def : LoongArchPat<(bswap GPR64:$rj), (REVB_D GPR64:$rj)>; ++ ++let isCodeGenOnly = 1 in { ++ def SLLI_D_64_32 : Shift_Imm64<"", GPR64Opnd>, R2_IMM6<0b00>, GPR_64 { ++ let imm6 = 0; ++ let AsmString = "slli.d\t$rd, $rj, 32"; ++ let InOperandList = (ins GPR32:$rj); ++ let OutOperandList = (outs GPR64:$rd); ++ } ++ ++ let isMoveReg = 1, imm5 = 0, ++ AsmString = "slli.w\t$rd, $rj, 0", ++ OutOperandList = (outs GPR64:$rd) in { ++ let InOperandList = (ins GPR32:$rj) in ++ def SLLI_W_64_32 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; ++ let InOperandList = (ins GPR64:$rj) in ++ def SLLI_W_64_64 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; ++ } ++ ++ let AsmString = "sltui\t$rd, $rj, $imm12", ++ OutOperandList = (outs GPR64:$rd) in { ++ let InOperandList = (ins GPR64:$rj, simm12:$imm12) in ++ def SLTUI_64 : SetCC_I<"", GPR64Opnd, simm12>, R2_IMM12<0b001>, GPR_64; ++ } ++} ++ ++// 32-to-64-bit extension ++//def : LoongArchPat<(i64 (zext GPR32:$src)), (SRLI_D (SLLI_D_64_32 GPR32:$src), 32)>, GPR_64; ++def : LoongArchPat<(i64 (sext GPR32:$src)), (SLLI_W_64_32 GPR32:$src)>, GPR_64; ++def : LoongArchPat<(i64 (sext_inreg GPR64:$src, i32)), (SLLI_W_64_64 GPR64:$src)>, GPR_64; ++ ++let Uses = [A0, A1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in { ++ def LoongArcheh_return32 : LoongArchPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst), ++ [(LoongArchehret GPR32:$spoff, GPR32:$dst)]>; ++ def LoongArcheh_return64 : LoongArchPseudo<(outs), (ins GPR64:$spoff,GPR64:$dst), ++ [(LoongArchehret GPR64:$spoff, GPR64:$dst)]>; ++} ++ ++def : LoongArchPat<(select i32:$cond, i64:$t, i64:$f), ++ (OR (MASKEQZ i64:$t, (SLLI_W_64_32 i32:$cond)), ++ (MASKNEZ i64:$f, (SLLI_W_64_32 i32:$cond)))>; ++// setcc patterns ++multiclass SeteqPats { ++ def : LoongArchPat<(seteq RC:$lhs, 0), ++ (SLTiuOp RC:$lhs, 1)>; ++ def : LoongArchPat<(setne RC:$lhs, 0), ++ (SLTuOp ZEROReg, RC:$lhs)>; ++ def : LoongArchPat<(seteq RC:$lhs, RC:$rhs), ++ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; ++ def : LoongArchPat<(setne RC:$lhs, RC:$rhs), ++ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; ++} ++ ++multiclass SetlePats { ++ def : LoongArchPat<(setle RC:$lhs, RC:$rhs), ++ (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>; ++ def : LoongArchPat<(setule RC:$lhs, RC:$rhs), ++ (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>; ++} ++ ++multiclass SetgtPats { ++ def : LoongArchPat<(setgt RC:$lhs, RC:$rhs), ++ (SLTOp RC:$rhs, RC:$lhs)>; ++ def : LoongArchPat<(setugt RC:$lhs, RC:$rhs), ++ (SLTuOp RC:$rhs, RC:$lhs)>; ++} ++ ++multiclass SetgePats { ++ def : LoongArchPat<(setge RC:$lhs, RC:$rhs), ++ (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>; ++ def : LoongArchPat<(setuge RC:$lhs, RC:$rhs), ++ (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>; ++} ++ ++multiclass SetgeImmPats { ++ def : LoongArchPat<(setge RC:$lhs, immSExt12:$rhs), ++ (XORiOp (SLTiOp RC:$lhs, immSExt12:$rhs), 1)>; ++ def : LoongArchPat<(setuge RC:$lhs, immSExt12:$rhs), ++ (XORiOp (SLTiuOp RC:$lhs, immSExt12:$rhs), 1)>; ++} ++ ++class LoadRegImmPat : ++ LoongArchPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; ++ ++class StoreRegImmPat : ++ LoongArchPat<(Node ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; ++ ++class LoadRegImm14Lsl2Pat : ++ LoongArchPat<(ValTy (Node addrimm14lsl2:$a)), (LoadInst addrimm14lsl2:$a)>; ++ ++class StoreRegImm14Lsl2Pat : ++ LoongArchPat<(Node ValTy:$v, addrimm14lsl2:$a), (StoreInst ValTy:$v, addrimm14lsl2:$a)>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++// let AddedComplexity = 40 so that these instructions are selected instead of ++// LDX/STX which needs one more register and an ANDI instruction. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ ++ def : LoadRegImm14Lsl2Pat; ++ def : LoadRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++} + + //===----------------------------------------------------------------------===// +-// Basic Floating-Point Instructions ++// Base Extension Support + //===----------------------------------------------------------------------===// + +-include "LoongArchFloat32InstrInfo.td" +-include "LoongArchFloat64InstrInfo.td" ++include "LoongArch32InstrInfo.td" ++include "LoongArchInstrInfoF.td" ++include "LoongArchLSXInstrFormats.td" ++include "LoongArchLSXInstrInfo.td" ++include "LoongArchLASXInstrFormats.td" ++include "LoongArchLASXInstrInfo.td" ++ ++defm : SeteqPats, GPR_64; ++defm : SetlePats, GPR_64; ++defm : SetgtPats, GPR_64; ++defm : SetgePats, GPR_64; ++defm : SetgeImmPats, GPR_64; ++ ++/// ++/// for relocation ++/// ++let isCodeGenOnly = 1 in { ++def PCADDU12I_ri : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; ++def PCADDU12I_rii : RELOC_rii<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; ++def ORI_rri : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; ++def ORI_rrii : RELOC_rrii<"ori", GPR64Opnd, uimm12>, R2_IMM12<0b110>; ++def LU12I_W_ri : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; ++def LU32I_D_ri : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def LU32I_D_rii : RELOC_rii<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def LU52I_D_rri : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; ++def LU52I_D_rrii : RELOC_rrii<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; ++def ADDI_D_rri : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; ++def ADDI_D_rrii : RELOC_rrii<"addi.d", GPR64Opnd, simm12>, R2_IMM12<0b011>; ++def LD_D_rri : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; ++def LD_D_rrii : RELOC_rrii<"ld.d", GPR64Opnd, simm12>, LOAD_STORE_RRI<0b0011>; ++def ADD_D_rrr : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; ++def LDX_D_rrr : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, ++ R3MI<0b00011000>; ++} + + //===----------------------------------------------------------------------===// +-// Privilege Instructions ++// Assembler Pseudo Instructions + //===----------------------------------------------------------------------===// +- +-// CSR Access Instructions +-def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num), +- "csrrd", "$rd, $csr_num">; +-let Constraints = "$rd = $dst" in { +-def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst), +- (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">; +-def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst), +- (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), +- "csrxchg", "$rd, $rj, $csr_num">; +-} // Constraints = "$rd = $dst" +- +-// IOCSR Access Instructions +-def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">; +-def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">; +-def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">; +-def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">; +-def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">; +-def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">; +-let Predicates = [IsLA64] in { +-def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">; +-def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">; +-} // Predicates = [IsLA64] +- +-// Cache Maintenance Instructions +-def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", +- "$op, $rj, $imm12">; +- +-// TLB Maintenance Instructions +-def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">; +-def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">; +-def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">; +-def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">; +-def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">; +-def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">; +-def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb", +- "$op, $rj, $rk">; +- +-// Software Page Walking Instructions +-def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd), +- (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">; +-def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">; +- +- +-// Other Miscellaneous Instructions +-def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">; +-def DBCL : MISC_I15<0b00000000001010101, "dbcl">; +-def IDLE : MISC_I15<0b00000110010010001, "idle">; ++def LoadImm32 : LoongArchAsmPseudoInst<(outs GPR32Opnd:$rd), ++ (ins uimm32_coerced:$imm32), ++ "li.w\t$rd, $imm32">; ++def LoadImm64 : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "li.d\t$rd, $imm64">; ++// load address ++def LoadAddrLocal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.local\t$rd, $imm64">; ++def : InstAlias<"la.pcrel $rd, $imm", ++ (LoadAddrLocal GPR64Opnd:$rd, imm64:$imm), 1>; ++def LoadAddrGlobal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.global\t$rd, $imm64">; ++def LoadAddrGlobal_Alias : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la\t$rd, $imm64">; ++def : InstAlias<"la.got $rd, $imm", ++ (LoadAddrGlobal GPR64Opnd:$rd, imm64:$imm), 1>; ++ ++def LoadAddrTLS_LE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.le\t$rd, $imm64">; ++def LoadAddrTLS_IE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.ie\t$rd, $imm64">; ++def LoadAddrTLS_GD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.gd\t$rd, $imm64">; ++def LoadAddrTLS_LD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.ld\t$rd, $imm64">; ++ ++// load address with a temp reg ++def LoadAddrLocalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.local\t$rd, $rt, $imm64">; ++def LoadAddrGlobalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.global\t$rd, $rt, $imm64">; ++def LoadAddrTLS_IE_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.ie\t$rd, $rt, $imm64">; ++def LoadAddrTLS_GD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.gd\t$rd, $rt, $imm64">; ++def LoadAddrTLS_LD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.ld\t$rd, $rt, $imm64">; ++ ++// trap when div zero ++def PseudoTEQ : LoongArchPseudo<(outs), (ins GPR64Opnd:$rt), []>; ++ ++ ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, immSExt12:$imm12)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADDI_W GPR32:$src, immSExt12:$imm12), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRAI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRA_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++ ++def : LoongArchPat<(i64 (xor GPR64:$rj, (i64 -1))), ++ (NOR ZERO_64, GPR64:$rj)>; ++ ++def : LoongArchPat<(and GPR64:$rj, (i64 (xor GPR64:$rk, (i64 -1)))), ++ (ANDN GPR64:$rj, GPR64:$rk)>; ++ ++def : LoongArchPat<(i64 (or GPR64:$rj, (xor GPR64:$rk, (i64 -1)))), ++ (ORN GPR64:$rj, GPR64:$rk)>; ++ ++def : LoongArchPat<(i64 (zext (i32 (seteq GPR64:$rj, (i64 0))))), ++ (SLTUI_64 GPR64:$rj, (i64 1))>; ++ ++ ++def : LoongArchPat<(i64 (zext (i32 (srl GPR32:$src, immZExt5:$imm5)))), ++ (BSTRPICK_D (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), ++ (i32 31), immZExt5:$imm5)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td +new file mode 100644 +index 000000000..4df5fc88e +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td +@@ -0,0 +1,630 @@ ++//===- LoongArchInstrInfoF.td - Target Description for LoongArch Target -*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetInstrInfo class. ++// ++//===----------------------------------------------------------------------===// ++// FP immediate patterns. ++def fpimm0 : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(+0.0); ++}]>; ++ ++def fpimm0neg : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(-0.0); ++}]>; ++ ++def fpimm1 : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(+1.0); ++}]>; ++ ++def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">, ++ AssemblerPredicate<(all_of FeatureSoftFloat)>; ++ ++class HARDFLOAT { list HardFloatPredicate = [IsNotSoftFloat]; } ++ ++def SDT_LoongArchTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; ++ ++def LoongArchTruncIntFP : SDNode<"LoongArchISD::TruncIntFP", SDT_LoongArchTruncIntFP>; ++ ++def SDT_LoongArchFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>, ++ SDTCisVT<1, i32>, ++ SDTCisVT<2, OtherVT>]>; ++ ++def LoongArchFPBrcond : SDNode<"LoongArchISD::FPBrcond", SDT_LoongArchFPBrcond, ++ [SDNPHasChain, SDNPOptInGlue]>; ++ ++def SDT_LoongArchCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, ++ SDTCisSameAs<1, 3>]>; ++ ++def LoongArchCMovFP_T : SDNode<"LoongArchISD::CMovFP_T", SDT_LoongArchCMovFP, [SDNPInGlue]>; ++ ++def LoongArchCMovFP_F : SDNode<"LoongArchISD::CMovFP_F", SDT_LoongArchCMovFP, [SDNPInGlue]>; ++ ++def SDT_LoongArchFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, ++ SDTCisVT<2, i32>]>; ++ ++def LoongArchFPCmp : SDNode<"LoongArchISD::FPCmp", SDT_LoongArchFPCmp, [SDNPOutGlue]>; ++ ++def SDT_LoongArchFSEL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, ++ SDTCisSameAs<1, 3>]>; ++ ++def LoongArchFSEL : SDNode<"LoongArchISD::FSEL", SDT_LoongArchFSEL, ++ [SDNPInGlue]>; ++ ++//===---------------------------------------------------------------------===/ ++//Instruction Class Templates ++//===---------------------------------------------------------------------===/ ++ ++class Float_MOVF ++ : InstForm<(outs RO:$rd), (ins RC:$fj), ++ !strconcat(opstr, "\t$rd, $fj"), ++ [(set RO:$rd, (OpNode RC:$fj))], ++ FrmFR, opstr>, HARDFLOAT { ++ let isMoveReg = 1; ++} ++ ++class Float_MOVT ++ : InstForm<(outs RO:$fd), (ins RC:$rj), ++ !strconcat(opstr, "\t$fd, $rj"), ++ [(set RO:$fd, (OpNode RC:$rj))], ++ FrmFR, opstr>, HARDFLOAT { ++ let isMoveReg = 1; ++} ++ ++class Float_CVT ++ : InstForm<(outs RO:$fd), (ins RS:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode RS:$fj))], ++ FrmFR, opstr>, ++ HARDFLOAT { ++ let hasSideEffects = 0; ++} ++ ++/// float mov ++class Gpr_2_Fcsr ++ : InstForm<(outs FCSROpnd:$fcsr), (ins RO:$rj), ++ !strconcat(opstr, "\t$fcsr, $rj"), ++ [(set FCSROpnd:$fcsr, (OpNode RO:$rj))], ++ FrmR, opstr>; ++class Fcsr_2_Gpr ++ : InstForm<(outs RO:$rd), (ins FCSROpnd:$fcsr), ++ !strconcat(opstr, "\t$rd, $fcsr"), ++ [(set RO:$rd, (OpNode FCSROpnd:$fcsr))], ++ FrmR, opstr>; ++class Fgr_2_Fcfr ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj), ++ !strconcat(opstr, "\t$cd, $fj"), ++ [(set FCFROpnd:$cd, (OpNode RO:$fj))], ++ FrmR, opstr>; ++class Fcfr_2_Fgr ++ : InstForm<(outs RO:$fd), (ins FCFROpnd:$cj), ++ !strconcat(opstr, "\t$fd, $cj"), ++ [(set RO:$fd, (OpNode FCFROpnd:$cj))], ++ FrmR, opstr>; ++class Gpr_2_Fcfr ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$rj), ++ !strconcat(opstr, "\t$cd, $rj"), ++ [(set FCFROpnd:$cd, (OpNode RO:$rj))], ++ FrmR, opstr>; ++class Fcfr_2_Gpr ++ : InstForm<(outs RO:$rd), (ins FCFROpnd:$cj), ++ !strconcat(opstr, "\t$rd, $cj"), ++ [(set RO:$rd, (OpNode FCFROpnd:$cj))], ++ FrmR, opstr>; ++ ++class FLDX : ++ InstForm<(outs DRC:$fd), (ins PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(set DRC:$fd, (OpNode (add iPTR:$rj, iPTR:$rk)))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++} ++ ++class FSTX : ++ InstForm<(outs), (ins DRC:$fd, PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(OpNode DRC:$fd, (add iPTR:$rj, iPTR:$rk))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++} ++ ++/// f{maxa/mina}.{s/d} ++class Float_Reg3_Fmaxa ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [], FrmR, opstr>; ++/// frecip ++class Float_Reg2_Frecip ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode fpimm1, RO:$fj))], ++ FrmR, opstr>; ++/// frsqrt ++class Float_Reg2_Frsqrt ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode fpimm1, (fsqrt RO:$fj)))], ++ FrmR, opstr>; ++ ++class BceqzBr : ++ InstForm<(outs), (ins FCFROpnd:$cj, opnd:$offset), ++ !strconcat(opstr, "\t$cj, $offset"), ++ [(LoongArchFPBrcond Op, FCFROpnd:$cj, bb:$offset)], ++ FrmFI, opstr>, HARDFLOAT { ++ let isBranch = 1; ++ let isTerminator = 1; ++ let hasFCCRegOperand = 1; ++} ++ ++class FCMP_COND ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj, RO:$fk), ++ !strconcat("fcmp.", CondStr, ".", TypeStr, "\t$cd, $fj, $fk"), ++ [(set FCFROpnd:$cd, (OpNode RO:$fj, RO:$fk))], ++ FrmOther, ++ !strconcat("fcmp.", CondStr, ".", TypeStr)> { ++ bit isCTI = 1; // for what? from Mips32r6InstrInfo.td line 219 ++} ++ ++class FIELD_CMP_COND Val> { ++ bits<5> Value = Val; ++} ++def FIELD_CMP_COND_CAF : FIELD_CMP_COND<0x0>; ++def FIELD_CMP_COND_CUN : FIELD_CMP_COND<0x8>; ++def FIELD_CMP_COND_CEQ : FIELD_CMP_COND<0x4>; ++def FIELD_CMP_COND_CUEQ : FIELD_CMP_COND<0xC>; ++def FIELD_CMP_COND_CLT : FIELD_CMP_COND<0x2>; ++def FIELD_CMP_COND_CULT : FIELD_CMP_COND<0xA>; ++def FIELD_CMP_COND_CLE : FIELD_CMP_COND<0x6>; ++def FIELD_CMP_COND_CULE : FIELD_CMP_COND<0xE>; ++def FIELD_CMP_COND_CNE : FIELD_CMP_COND<0x10>; ++def FIELD_CMP_COND_COR : FIELD_CMP_COND<0x14>; ++def FIELD_CMP_COND_CUNE : FIELD_CMP_COND<0x18>; ++def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0x1>; ++def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0x9>; ++def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0x5>; ++def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0xD>; ++def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0x3>; ++def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0xB>; ++def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0x7>; ++def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0xF>; ++def FIELD_CMP_COND_SNE : FIELD_CMP_COND<0x11>; ++def FIELD_CMP_COND_SOR : FIELD_CMP_COND<0x15>; ++def FIELD_CMP_COND_SUNE : FIELD_CMP_COND<0x19>; ++ ++multiclass FCMP_COND_M op, string TypeStr, ++ RegisterOperand RO> { ++ def FCMP_CAF_#NAME : FCMP_COND<"caf", TypeStr, RO>, ++ R2_COND; ++ def FCMP_CUN_#NAME : FCMP_COND<"cun", TypeStr, RO, setuo>, ++ R2_COND; ++ def FCMP_CEQ_#NAME : FCMP_COND<"ceq", TypeStr, RO, setoeq>, ++ R2_COND; ++ def FCMP_CUEQ_#NAME : FCMP_COND<"cueq", TypeStr, RO, setueq>, ++ R2_COND; ++ def FCMP_CLT_#NAME : FCMP_COND<"clt", TypeStr, RO, setolt>, ++ R2_COND; ++ def FCMP_CULT_#NAME : FCMP_COND<"cult", TypeStr, RO, setult>, ++ R2_COND; ++ def FCMP_CLE_#NAME : FCMP_COND<"cle", TypeStr, RO, setole>, ++ R2_COND; ++ def FCMP_CULE_#NAME : FCMP_COND<"cule", TypeStr, RO, setule>, ++ R2_COND; ++ def FCMP_CNE_#NAME : FCMP_COND<"cne", TypeStr, RO, setone>, ++ R2_COND; ++ def FCMP_COR_#NAME : FCMP_COND<"cor", TypeStr, RO, seto>, ++ R2_COND; ++ def FCMP_CUNE_#NAME : FCMP_COND<"cune", TypeStr, RO, setune>, ++ R2_COND; ++ ++ def FCMP_SAF_#NAME : FCMP_COND<"saf", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUN_#NAME : FCMP_COND<"sun", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SEQ_#NAME : FCMP_COND<"seq", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUEQ_#NAME : FCMP_COND<"sueq", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SLT_#NAME : FCMP_COND<"slt", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SULT_#NAME : FCMP_COND<"sult", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SLE_#NAME : FCMP_COND<"sle", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SULE_#NAME : FCMP_COND<"sule", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SNE_#NAME : FCMP_COND<"sne", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SOR_#NAME : FCMP_COND<"sor", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUNE_#NAME : FCMP_COND<"sune", TypeStr, RO>, ++ R2_COND; ++} ++ ++//// comparisons supported via another comparison ++//multiclass FCmp_Pats { ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++//} ++ ++ ++/// ++/// R2 ++/// ++def FABS_S : Float_Reg2<"fabs.s", FGR32Opnd, fabs>, R2F<0b0100000001>; ++def FABS_D : Float_Reg2<"fabs.d", FGR64Opnd, fabs>, R2F<0b0100000010>; ++def FNEG_S : Float_Reg2<"fneg.s", FGR32Opnd, fneg>, R2F<0b0100000101>; ++def FNEG_D : Float_Reg2<"fneg.d", FGR64Opnd, fneg>, R2F<0b0100000110>; ++def FLOGB_S : Float_Reg2<"flogb.s", FGR32Opnd>, R2F<0b0100001001>; ++def FLOGB_D : Float_Reg2<"flogb.d", FGR64Opnd>, R2F<0b0100001010>; ++def FCLASS_S : Float_Reg2<"fclass.s", FGR32Opnd>, R2F<0b0100001101>; ++def FCLASS_D : Float_Reg2<"fclass.d", FGR64Opnd>, R2F<0b0100001110>; ++def FSQRT_S : Float_Reg2<"fsqrt.s", FGR32Opnd, fsqrt>, R2F<0b0100010001>; ++def FSQRT_D : Float_Reg2<"fsqrt.d", FGR64Opnd, fsqrt>, R2F<0b0100010010>; ++def FRECIP_S : Float_Reg2_Frecip<"frecip.s", FGR32Opnd, fdiv>, R2F<0b0100010101>; ++def FRECIP_D : Float_Reg2_Frecip<"frecip.d", FGR64Opnd, fdiv>, R2F<0b0100010110>; ++def FRSQRT_S : Float_Reg2_Frsqrt<"frsqrt.s", FGR32Opnd, fdiv>, R2F<0b0100011001>; ++def FRSQRT_D : Float_Reg2_Frsqrt<"frsqrt.d", FGR64Opnd, fdiv>, R2F<0b0100011010>; ++def FMOV_S : Float_Reg2<"fmov.s", FGR32Opnd>, R2F<0b0100100101>; ++def FMOV_D : Float_Reg2<"fmov.d", FGR64Opnd>, R2F<0b0100100110>; ++ ++def MOVGR2FR_W : Float_MOVT<"movgr2fr.w", FGR32Opnd, GPR32Opnd, bitconvert>, MOVFI<0b0100101001>; ++def MOVGR2FR_D : Float_MOVT<"movgr2fr.d", FGR64Opnd, GPR64Opnd, bitconvert>, MOVFI<0b0100101010>; ++def MOVGR2FRH_W : Float_MOVT<"movgr2frh.w", FGR64Opnd, GPR32Opnd>, MOVFI<0b0100101011>; //not realize ++def MOVFR2GR_S : Float_MOVF<"movfr2gr.s", GPR32Opnd, FGR32Opnd, bitconvert>, MOVIF<0b0100101101>; ++def MOVFR2GR_D : Float_MOVF<"movfr2gr.d", GPR64Opnd, FGR64Opnd, bitconvert>, MOVIF<0b0100101110>; ++def MOVFRH2GR_S : Float_MOVF<"movfrh2gr.s", GPR32Opnd, FGR32Opnd>, MOVIF<0b0100101111>; //not realize ++ ++let isCodeGenOnly = 1 in { ++ def MOVFR2GR_DS : Float_MOVF<"movfr2gr.s", GPR64Opnd, FGR32Opnd>, MOVIF<0b0100101101>; ++} ++ ++def FCVT_S_D : Float_CVT<"fcvt.s.d", FGR32Opnd, FGR64Opnd>, R2F<0b1001000110>; ++def FCVT_D_S : Float_CVT<"fcvt.d.s", FGR64Opnd, FGR32Opnd>, R2F<0b1001001001>; ++ ++def FTINTRM_W_S : Float_Reg2<"ftintrm.w.s", FGR32Opnd>, R2F<0b1010000001>; ++def FTINTRM_W_D : Float_Reg2<"ftintrm.w.d", FGR64Opnd>, R2F<0b1010000010>; ++def FTINTRM_L_S : Float_Reg2<"ftintrm.l.s", FGR32Opnd>, R2F<0b1010001001>; ++def FTINTRM_L_D : Float_Reg2<"ftintrm.l.d", FGR64Opnd>, R2F<0b1010001010>; ++def FTINTRP_W_S : Float_Reg2<"ftintrp.w.s", FGR32Opnd>, R2F<0b1010010001>; ++def FTINTRP_W_D : Float_Reg2<"ftintrp.w.d", FGR64Opnd>, R2F<0b1010010010>; ++def FTINTRP_L_S : Float_Reg2<"ftintrp.l.s", FGR32Opnd>, R2F<0b1010011001>; ++def FTINTRP_L_D : Float_Reg2<"ftintrp.l.d", FGR64Opnd>, R2F<0b1010011010>; ++def FTINTRZ_W_S : Float_Reg2<"ftintrz.w.s", FGR32Opnd>, R2F<0b1010100001>; ++def FTINTRZ_L_D : Float_Reg2<"ftintrz.l.d", FGR64Opnd>, R2F<0b1010101010>; ++def FTINTRNE_W_S : Float_Reg2<"ftintrne.w.s", FGR32Opnd>, R2F<0b1010110001>; ++def FTINTRNE_W_D : Float_Reg2<"ftintrne.w.d", FGR64Opnd>, R2F<0b1010110010>; ++def FTINTRNE_L_S : Float_Reg2<"ftintrne.l.s", FGR32Opnd>, R2F<0b1010111001>; ++def FTINTRNE_L_D : Float_Reg2<"ftintrne.l.d", FGR64Opnd>, R2F<0b1010111010>; ++ ++def FTINT_W_S : Float_CVT<"ftint.w.s", FGR32Opnd, FGR32Opnd>, R2F<0b1011000001>; ++def FTINT_W_D : Float_CVT<"ftint.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1011000010>; ++def FTINT_L_S : Float_CVT<"ftint.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1011001001>; ++def FTINT_L_D : Float_CVT<"ftint.l.d", FGR64Opnd, FGR64Opnd>, R2F<0b1011001010>; ++def FFINT_S_W : Float_CVT<"ffint.s.w", FGR32Opnd, FGR32Opnd>, R2F<0b1101000100>; ++def FFINT_S_L : Float_CVT<"ffint.s.l", FGR32Opnd, FGR64Opnd>, R2F<0b1101000110>; ++def FFINT_D_W : Float_CVT<"ffint.d.w", FGR64Opnd, FGR32Opnd>, R2F<0b1101001000>; ++def FFINT_D_L : Float_CVT<"ffint.d.l", FGR64Opnd, FGR64Opnd>, R2F<0b1101001010>; ++ ++def FRINT_S : Float_Reg2<"frint.s", FGR32Opnd, frint>, R2F<0b1110010001>; ++def FRINT_D : Float_Reg2<"frint.d", FGR64Opnd, frint>, R2F<0b1110010010>; ++ ++/// ++/// R3 ++/// ++def FADD_S : Float_Reg3<"fadd.s", FGR32Opnd, fadd>, R3F<0b000001>; ++def FADD_D : Float_Reg3<"fadd.d", FGR64Opnd, fadd>, R3F<0b000010>; ++def FSUB_S : Float_Reg3<"fsub.s", FGR32Opnd, fsub>, R3F<0b000101>; ++def FSUB_D : Float_Reg3<"fsub.d", FGR64Opnd, fsub>, R3F<0b000110>; ++def FMUL_S : Float_Reg3<"fmul.s", FGR32Opnd, fmul>, R3F<0b001001>; ++def FMUL_D : Float_Reg3<"fmul.d", FGR64Opnd, fmul>, R3F<0b001010>; ++def FDIV_S : Float_Reg3<"fdiv.s", FGR32Opnd, fdiv>, R3F<0b001101>; ++def FDIV_D : Float_Reg3<"fdiv.d", FGR64Opnd, fdiv>, R3F<0b001110>; ++def FMAX_S : Float_Reg3<"fmax.s", FGR32Opnd, fmaxnum_ieee>, R3F<0b010001>; ++def FMAX_D : Float_Reg3<"fmax.d", FGR64Opnd, fmaxnum_ieee>, R3F<0b010010>; ++def FMIN_S : Float_Reg3<"fmin.s", FGR32Opnd, fminnum_ieee>, R3F<0b010101>; ++def FMIN_D : Float_Reg3<"fmin.d", FGR64Opnd, fminnum_ieee>, R3F<0b010110>; ++def FMAXA_S : Float_Reg3_Fmaxa<"fmaxa.s", FGR32Opnd>, R3F<0b011001>; ++def FMAXA_D : Float_Reg3_Fmaxa<"fmaxa.d", FGR64Opnd>, R3F<0b011010>; ++def FMINA_S : Float_Reg3_Fmaxa<"fmina.s", FGR32Opnd>, R3F<0b011101>; ++def FMINA_D : Float_Reg3_Fmaxa<"fmina.d", FGR64Opnd>, R3F<0b011110>; ++def FSCALEB_S : Float_Reg3<"fscaleb.s", FGR32Opnd>, R3F<0b100001>; ++def FSCALEB_D : Float_Reg3<"fscaleb.d", FGR64Opnd>, R3F<0b100010>; ++def FCOPYSIGN_S : Float_Reg3<"fcopysign.s", FGR32Opnd, fcopysign>, R3F<0b100101>; ++def FCOPYSIGN_D : Float_Reg3<"fcopysign.d", FGR64Opnd, fcopysign>, R3F<0b100110>; ++/// ++/// R4_IMM21 ++/// ++def FMADD_S : Mul_Reg4<"fmadd.s", FGR32Opnd>, R4MUL<0b0001>; ++def FMADD_D : Mul_Reg4<"fmadd.d", FGR64Opnd>, R4MUL<0b0010>; ++def FMSUB_S : Mul_Reg4<"fmsub.s", FGR32Opnd>, R4MUL<0b0101>; ++def FMSUB_D : Mul_Reg4<"fmsub.d", FGR64Opnd>, R4MUL<0b0110>; ++def FNMADD_S : NMul_Reg4<"fnmadd.s", FGR32Opnd>, R4MUL<0b1001>; ++def FNMADD_D : NMul_Reg4<"fnmadd.d", FGR64Opnd>, R4MUL<0b1010>; ++def FNMSUB_S : NMul_Reg4<"fnmsub.s", FGR32Opnd>, R4MUL<0b1101>; ++def FNMSUB_D : NMul_Reg4<"fnmsub.d", FGR64Opnd>, R4MUL<0b1110>; ++ ++ ++// fmadd: fj * fk + fa ++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa), ++ (FMADD_D $fj, $fk, $fa)>; ++ ++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa), ++ (FMADD_S $fj, $fk, $fa)>; ++ ++ ++// fmsub: fj * fk - fa ++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), ++ (FMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), ++ (FMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++ ++// fnmadd: -(fj * fk + fa) ++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), ++ (FNMADD_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), ++ (FNMADD_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++// fnmsub: -(fj * fk - fa) ++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, FGR64Opnd:$fa), ++ (FNMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, FGR32Opnd:$fa), ++ (FNMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++let Pattern = [] in { ++defm S : FCMP_COND_M<0b01, "s", FGR32Opnd>; ++defm D : FCMP_COND_M<0b10, "d", FGR64Opnd>; ++} ++// ++//defm S : FCmp_Pats; ++//defm D : FCmp_Pats; ++ ++/// ++/// Float point branching ++/// ++def LoongArch_BRANCH_F : PatLeaf<(i32 0)>; ++def LoongArch_BRANCH_T : PatLeaf<(i32 1)>; ++ ++def BCEQZ : BceqzBr<"bceqz", brtarget, LoongArch_BRANCH_F>, R1_BCEQZ<0>; ++def BCNEZ : BceqzBr<"bcnez", brtarget, LoongArch_BRANCH_T>, R1_BCEQZ<1>; ++ ++/// ++/// FMOV ++/// ++def MOVGR2FCSR : Gpr_2_Fcsr<"movgr2fcsr", GPR64Opnd>, MOVGPR2FCSR; ++def MOVFCSR2GR : Fcsr_2_Gpr<"movfcsr2gr", GPR64Opnd>, MOVFCSR2GPR; ++def MOVFR2CF : Fgr_2_Fcfr<"movfr2cf", FGR64Opnd>, MOVFGR2FCFR; ++def MOVCF2FR : Fcfr_2_Fgr<"movcf2fr", FGR64Opnd>, MOVFCFR2FGR; ++def MOVGR2CF : Gpr_2_Fcfr<"movgr2cf", GPR64Opnd>, MOVGPR2FCFR; ++def MOVCF2GR : Fcfr_2_Gpr<"movcf2gr", GPR64Opnd>, MOVFCFR2GPR; ++ ++let isCodeGenOnly = 1 in { ++ def MOVFR2CF32 : Fgr_2_Fcfr<"movfr2cf", FGR32Opnd>, MOVFGR2FCFR; ++ def MOVCF2FR32 : Fcfr_2_Fgr<"movcf2fr", FGR32Opnd>, MOVFCFR2FGR; ++ def MOVGR2CF32 : Gpr_2_Fcfr<"movgr2cf", GPR32Opnd>, MOVGPR2FCFR; ++ def MOVCF2GR32 : Fcfr_2_Gpr<"movcf2gr", GPR32Opnd>, MOVFCFR2GPR; ++} ++ ++class Sel_Reg4 ++ : InstForm<(outs RO:$fd), (ins FCFROpnd:$ca, RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $ca"), ++ [(set RO:$fd, (LoongArchFSEL RO:$fj, FCFROpnd:$ca, RO:$fk))], ++ FrmR, opstr>{ ++ let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6]; ++ let hasFCCRegOperand = 1; ++ } ++ ++def FSEL_T_S : Sel_Reg4<"fsel", FGR32Opnd>, R4SEL; ++let isCodeGenOnly = 1 in { ++ def FSEL_T_D : Sel_Reg4<"fsel", FGR64Opnd>, R4SEL; ++} ++ ++/// ++/// Mem access ++/// ++def FLD_S : FLd<"fld.s", FGR32Opnd, mem, load>, LOAD_STORE<0b1100>; ++def FST_S : FSt<"fst.s", FGR32Opnd, mem, store>, LOAD_STORE<0b1101>; ++def FLD_D : FLd<"fld.d", FGR64Opnd, mem, load>, LOAD_STORE<0b1110>; ++def FST_D : FSt<"fst.d", FGR64Opnd, mem, store>, LOAD_STORE<0b1111>; ++ ++def FLDX_S : FLDX<"fldx.s", FGR32Opnd, load>, R3MF<0b01100000>; ++def FLDX_D : FLDX<"fldx.d", FGR64Opnd, load>, R3MF<0b01101000>; ++def FSTX_S : FSTX<"fstx.s", FGR32Opnd, store>, R3MF<0b01110000>; ++def FSTX_D : FSTX<"fstx.d", FGR64Opnd, store>, R3MF<0b01111000>; ++ ++def FLDGT_S : Float_Int_Reg3<"fldgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101000>; ++def FLDGT_D : Float_Int_Reg3<"fldgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101001>; ++def FLDLE_S : Float_Int_Reg3<"fldle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101010>; ++def FLDLE_D : Float_Int_Reg3<"fldle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101011>; ++def FSTGT_S : Float_STGT_LE<"fstgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101100>; ++def FSTGT_D : Float_STGT_LE<"fstgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101101>; ++def FSTLE_S : Float_STGT_LE<"fstle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101110>; ++def FSTLE_D : Float_STGT_LE<"fstle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101111>; ++ ++let isPseudo = 1, isCodeGenOnly = 1 in { ++ def PseudoFFINT_S_W : Float_CVT<"", FGR32Opnd, GPR32Opnd>; ++ def PseudoFFINT_D_W : Float_CVT<"", FGR64Opnd, GPR32Opnd>; ++ def PseudoFFINT_S_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; ++ def PseudoFFINT_D_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; ++} ++ ++def : LoongArchPat<(f32 (fpround FGR64Opnd:$src)), ++ (FCVT_S_D FGR64Opnd:$src)>; ++def : LoongArchPat<(f64 (fpextend FGR32Opnd:$src)), ++ (FCVT_D_S FGR32Opnd:$src)>; ++ ++def : LoongArchPat<(f32 (sint_to_fp GPR32Opnd:$src)), ++ (PseudoFFINT_S_W GPR32Opnd:$src)>; ++def : LoongArchPat<(f64 (sint_to_fp GPR32Opnd:$src)), ++ (PseudoFFINT_D_W GPR32Opnd:$src)>; ++def : LoongArchPat<(f32 (sint_to_fp GPR64Opnd:$src)), ++ (EXTRACT_SUBREG (PseudoFFINT_S_L GPR64Opnd:$src), sub_lo)>; ++def : LoongArchPat<(f64 (sint_to_fp GPR64Opnd:$src)), ++ (PseudoFFINT_D_L GPR64Opnd:$src)>; ++ ++def : LoongArchPat<(f32 fpimm0), (MOVGR2FR_W ZERO)>; ++def : LoongArchPat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W ZERO))>; ++def : LoongArchPat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W ZERO, 1)))>; ++def : LoongArchPat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D ZERO_64, 1)))>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++} ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FTINTRZ_W_S FGR32Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), ++ (FTINTRZ_L_D FGR64Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FCVT_D_S (FTINTRZ_W_S FGR32Opnd:$src))>; ++ ++def : LoongArchPat<(f32 (fcopysign FGR32Opnd:$lhs, FGR64Opnd:$rhs)), ++ (FCOPYSIGN_S FGR32Opnd:$lhs, (FCVT_S_D FGR64Opnd:$rhs))>; ++def : LoongArchPat<(f64 (fcopysign FGR64Opnd:$lhs, FGR32Opnd:$rhs)), ++ (FCOPYSIGN_D FGR64Opnd:$lhs, (FCVT_D_S FGR32Opnd:$rhs))>; ++ ++let PrintMethod = "printFCCOperand",EncoderMethod = "getFCMPEncoding" in ++ def condcode : Operand; ++ ++class CEQS_FT : ++ InstForm<(outs), (ins RC:$fj, RC:$fk, condcode:$cond), ++ !strconcat("fcmp.$cond.", typestr, "\t$$fcc0, $fj, $fk"), ++ [(OpNode RC:$fj, RC:$fk, imm:$cond)], FrmFR, ++ !strconcat("fcmp.$cond.", typestr)>, HARDFLOAT { ++ let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7]; ++ let isCodeGenOnly = 1; ++ let hasFCCRegOperand = 1; ++} ++ ++def FCMP_S32 : CEQS_FT<"s", FGR32, LoongArchFPCmp>, CEQS_FM<0b01> { ++ bits<3> cd = 0; ++} ++def FCMP_D64 : CEQS_FT<"d", FGR64, LoongArchFPCmp>, CEQS_FM<0b10>{ ++ bits<3> cd = 0; ++} ++ ++ ++//multiclass FCmp_Pats2 { ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++// ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++// } ++// ++//defm S : FCmp_Pats2; ++//defm D : FCmp_Pats2; ++ ++let usesCustomInserter = 1 in { ++ class Select_Pseudo : ++ LoongArchPseudo<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>; ++ ++ class SelectFP_Pseudo_T : ++ LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (LoongArchCMovFP_T RC:$T, FCFROpnd:$cond, RC:$F))]>; ++ ++ class SelectFP_Pseudo_F : ++ LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (LoongArchCMovFP_F RC:$T, FCFROpnd:$cond, RC:$F))]>; ++} ++ ++def PseudoSELECT_I : Select_Pseudo; ++def PseudoSELECT_I64 : Select_Pseudo; ++def PseudoSELECT_S : Select_Pseudo; ++def PseudoSELECT_D64 : Select_Pseudo; ++ ++def PseudoSELECTFP_T_I : SelectFP_Pseudo_T; ++def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T; ++ ++def PseudoSELECTFP_F_I : SelectFP_Pseudo_F; ++def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F; ++ ++class ABSS_FT : ++ InstForm<(outs DstRC:$fd), (ins SrcRC:$fj), !strconcat(opstr, "\t$fd, $fj"), ++ [(set DstRC:$fd, (OpNode SrcRC:$fj))], FrmFR, opstr>; ++ ++def TRUNC_W_D : ABSS_FT<"ftintrz.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1010100010>; ++ ++def FTINTRZ_L_S : ABSS_FT<"ftintrz.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1010101001>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), ++ (TRUNC_W_D FGR64Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FTINTRZ_L_S FGR32Opnd:$src)>; ++ ++def : Pat<(fcanonicalize FGR32Opnd:$src), (FMAX_S $src, $src)>; ++def : Pat<(fcanonicalize FGR64Opnd:$src), (FMAX_D $src, $src)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (bitconvert FGR32Opnd:$src)))), ++ (MOVFR2GR_DS FGR32Opnd:$src)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td +new file mode 100644 +index 000000000..8e255f857 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td +@@ -0,0 +1,448 @@ ++//===- LoongArchLASXInstrFormats.td - LoongArch LASX Instruction Formats ---*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++class LASXInst : InstLA<(outs), (ins), "", [], FrmOther>, ++ EXT_LASX { ++} ++ ++class LASXCBranch : LASXInst { ++} ++ ++class LASXSpecial : LASXInst { ++} ++ ++class LASXPseudo pattern>: ++ LoongArchPseudo { ++ let Predicates = [HasLASX]; ++} ++ ++class LASX_3R op>: LASXInst { ++ bits<5> xk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_4R op>: LASXInst { ++ bits<5> xa; ++ bits<5> xk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = xa; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_XVFCMP op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> xk; ++ bits<5> cond; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I12_S op>: LASXInst { ++ bits<5> xd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI12_S op>: LASXInst { ++ bits<5> xd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI11_S op>: LASXInst { ++ bits<5> xd; ++ bits<16> addr; ++ ++ let Inst{31-21} = op; ++ let Inst{20-10} = addr{10-0}; ++ let Inst{9-5} = addr{15-11}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI10_S op>: LASXInst { ++ bits<5> xd; ++ bits<15> addr; ++ ++ let Inst{31-20} = op; ++ let Inst{19-10} = addr{9-0}; ++ let Inst{9-5} = addr{14-10}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI9_S op>: LASXInst { ++ bits<5> xd; ++ bits<14> addr; ++ ++ let Inst{31-19} = op; ++ let Inst{18-10} = addr{8-0}; ++ let Inst{9-5} = addr{13-9}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx5 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<5> idx; ++ ++ let Inst{31-23} = op; ++ let Inst{22-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx2 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<2> idx; ++ ++ let Inst{31-20} = op; ++ let Inst{19-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx3 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<3> idx; ++ ++ let Inst{31-21} = op; ++ let Inst{20-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx4 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<4> idx; ++ ++ let Inst{31-22} = op; ++ let Inst{21-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_3R_2GP op>: LASXInst { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_3R_1GP op>: LASXInst { ++ bits<5> rk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5 op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> si5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = si5; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5_mode_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> mode; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = mode; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_2R op>: LASXInst { ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SET op>: LASXInst { ++ bits<5> xj; ++ bits<3> cd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = xj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class LASX_2R_1GP op>: LASXInst { ++ bits<5> rj; ++ bits<5> xd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I3_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I4_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I6_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<6> ui6; ++ ++ let Inst{31-16} = op; ++ let Inst{15-10} = ui6; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I2_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I3_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_ELM_COPY_U3 op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_ELM_COPY_U2 op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_I1_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I2_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I7_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<7> ui7; ++ ++ let Inst{31-17} = op; ++ let Inst{16-10} = ui7; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_1R_I13 op>: LASXInst { ++ bits<13> i13; ++ bits<5> xd; ++ ++ let Inst{31-18} = op; ++ let Inst{17-5} = i13; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I8_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<8> ui8; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = ui8; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++ ++////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ++class LASX_I1_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I4_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_ELM_COPY_B op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_ELM_COPY_D op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_Addr_SI8_idx1 op>: LASXInst { ++ bits<5> xd; ++ bits<13> addr; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18-11} = addr{7-0}; ++ let Inst{10} = idx; ++ let Inst{9-5} = addr{12-8}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_1R_I13_I10 op>: LASXInst { ++ bits<10> i10; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-5} = i10; ++ let Inst{4-0} = xd; ++} ++ ++ ++ ++ ++ ++ +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +new file mode 100644 +index 000000000..2677a79fa +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -0,0 +1,5673 @@ ++//===- LoongArchLASXInstrInfo.td - loongson LASX instructions -*- tablegen ------------*-=// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes loongson ASX instructions. ++// ++//===----------------------------------------------------------------------===// ++def SDT_XVPERMI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>]>; ++def SDT_XVSHFI : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++def SDT_XVBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; ++ ++def SDT_INSVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def SDT_XVPICKVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def SDT_XVSHUF4I : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def LoongArchXVSHUFI : SDNode<"LoongArchISD::XVSHFI", SDT_XVSHFI>; ++ ++def LoongArchXVSELI : SDNode<"LoongArchISD::XVSELI", SDT_XVSHFI>; ++ ++def LoongArchXVPERMI : SDNode<"LoongArchISD::XVPERMI", SDT_XVPERMI>; ++ ++def LoongArchXVBROADCAST : SDNode<"LoongArchISD::XVBROADCAST", SDT_XVBROADCAST>; ++ ++def LoongArchINSVE : SDNode<"LoongArchISD::INSVE", SDT_INSVE>; ++ ++def LoongArchXVSHUF4I : SDNode<"LoongArchISD::XVSHUF4I", SDT_XVSHUF4I>; ++ ++def LoongArchXVPICKVE : SDNode<"LoongArchISD::XVPICKVE", SDT_INSVE>; ++ ++def xvbroadcast_v32i8 : PatFrag<(ops node:$v1), ++ (v32i8 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v16i16 : PatFrag<(ops node:$v1), ++ (v16i16 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v8i32 : PatFrag<(ops node:$v1), ++ (v8i32 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v4i64 : PatFrag<(ops node:$v1), ++ (v4i64 (LoongArchXVBROADCAST node:$v1))>; ++ ++ ++def vfseteq_v8f32 : vfsetcc_type; ++def vfseteq_v4f64 : vfsetcc_type; ++def vfsetge_v8f32 : vfsetcc_type; ++def vfsetge_v4f64 : vfsetcc_type; ++def vfsetgt_v8f32 : vfsetcc_type; ++def vfsetgt_v4f64 : vfsetcc_type; ++def vfsetle_v8f32 : vfsetcc_type; ++def vfsetle_v4f64 : vfsetcc_type; ++def vfsetlt_v8f32 : vfsetcc_type; ++def vfsetlt_v4f64 : vfsetcc_type; ++def vfsetne_v8f32 : vfsetcc_type; ++def vfsetne_v4f64 : vfsetcc_type; ++def vfsetoeq_v8f32 : vfsetcc_type; ++def vfsetoeq_v4f64 : vfsetcc_type; ++def vfsetoge_v8f32 : vfsetcc_type; ++def vfsetoge_v4f64 : vfsetcc_type; ++def vfsetogt_v8f32 : vfsetcc_type; ++def vfsetogt_v4f64 : vfsetcc_type; ++def vfsetole_v8f32 : vfsetcc_type; ++def vfsetole_v4f64 : vfsetcc_type; ++def vfsetolt_v8f32 : vfsetcc_type; ++def vfsetolt_v4f64 : vfsetcc_type; ++def vfsetone_v8f32 : vfsetcc_type; ++def vfsetone_v4f64 : vfsetcc_type; ++def vfsetord_v8f32 : vfsetcc_type; ++def vfsetord_v4f64 : vfsetcc_type; ++def vfsetun_v8f32 : vfsetcc_type; ++def vfsetun_v4f64 : vfsetcc_type; ++def vfsetueq_v8f32 : vfsetcc_type; ++def vfsetueq_v4f64 : vfsetcc_type; ++def vfsetuge_v8f32 : vfsetcc_type; ++def vfsetuge_v4f64 : vfsetcc_type; ++def vfsetugt_v8f32 : vfsetcc_type; ++def vfsetugt_v4f64 : vfsetcc_type; ++def vfsetule_v8f32 : vfsetcc_type; ++def vfsetule_v4f64 : vfsetcc_type; ++def vfsetult_v8f32 : vfsetcc_type; ++def vfsetult_v4f64 : vfsetcc_type; ++def vfsetune_v8f32 : vfsetcc_type; ++def vfsetune_v4f64 : vfsetcc_type; ++ ++def xvsplati8 : PatFrag<(ops node:$e0), ++ (v32i8 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati16 : PatFrag<(ops node:$e0), ++ (v16i16 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati32 : PatFrag<(ops node:$e0), ++ (v8i32 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati64 : PatFrag<(ops node:$e0), ++ (v4i64 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplatf32 : PatFrag<(ops node:$e0), ++ (v8f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def xvsplatf64 : PatFrag<(ops node:$e0), ++ (v4f64 (build_vector node:$e0, node:$e0))>; ++ ++def xvsplati8_uimm3 : SplatComplexPattern; ++def xvsplati16_uimm4 : SplatComplexPattern; ++ ++def xvsplati64_uimm6 : SplatComplexPattern; ++ ++def xvsplati8_simm5 : SplatComplexPattern; ++def xvsplati16_simm5 : SplatComplexPattern; ++def xvsplati32_simm5 : SplatComplexPattern; ++def xvsplati64_simm5 : SplatComplexPattern; ++ ++def xvsplat_imm_eq_1 : PatLeaf<(build_vector), [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def xvsplati64_imm_eq_1 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def xvbitclr_b : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_h : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_w : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_d : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl (v4i64 vsplati64_imm_eq_1), ++ node:$xa), ++ (bitconvert (v8i32 immAllOnesV))))>; ++ ++ ++ ++def xvsplati8_uimm5 : SplatComplexPattern; ++def xvsplati16_uimm5 : SplatComplexPattern; ++def xvsplati32_uimm5 : SplatComplexPattern; ++def xvsplati64_uimm5 : SplatComplexPattern; ++def xvsplati8_uimm8 : SplatComplexPattern; ++def xvsplati16_uimm8 : SplatComplexPattern; ++def xvsplati32_uimm8 : SplatComplexPattern; ++def xvsplati64_uimm8 : SplatComplexPattern; ++ ++ ++ ++def xvsplati8_uimm4 : SplatComplexPattern; ++def xvsplati16_uimm3 : SplatComplexPattern; ++def xvsplati32_uimm2 : SplatComplexPattern; ++def xvsplati64_uimm1 : SplatComplexPattern; ++ ++ ++// Patterns. ++class LASXPat pred = [HasLASX]> : ++ Pat, Requires; ++ ++class LASX_4RF { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk, ROXA:$xa); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk, $xa"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk, ROXA:$xa))]; ++} ++ ++class LASX_3RF { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++class LASX_3R_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, ROXK:$xk, CC)))]; ++} ++ ++class LASX_LD { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(set ROXD:$xd, (TyNode (OpNode Addr:$addr)))]; ++ string DecoderMethod = "DecodeLASX256Mem"; ++} ++ ++class LASX_ST { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(OpNode (TyNode ROXD:$xd), Addr:$addr)]; ++ string DecoderMethod = "DecodeLASX256Mem"; ++} ++ ++class LASX_I8_U5_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm5:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt5:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U2_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U3_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U4_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_SDX_LA { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, RORK:$rk)]; ++} ++ ++class LASX_3R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++class LASX_LDX_LA { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); ++ list Pattern = [(set ROXD:$xd, (OpNode iPTR:$rj, RORK:$rk))]; ++} ++ ++class LASX_3R_4R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, ++ ROXK:$xk))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++class LASX_3R_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, GPR32Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, GPR32Opnd:$rk))]; ++} ++ ++ ++class LASX_3R_VREPLVE_DESC_BASE_N { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, GPR64Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); ++ list Pattern = []; ++} ++ ++ ++class LASX_VEC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++ ++ ++class LASX_3RF_DESC_BASE : ++ LASX_3R_DESC_BASE; ++ ++ ++class LASX_3R_DESC_BASE1 { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xk, ROXK:$xj))]; ++} ++ ++class LASX_3RF_DESC_BASE1 : ++ LASX_3R_DESC_BASE1; ++ ++ ++ ++class LASX_3R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (LoongArchVSHF ROXD:$xd_in, ROXJ:$xj, ++ ROXK:$xk))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I5_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$si5, CC)))]; ++} ++ ++class LASX_I5_SETCC_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; ++} ++ ++ ++class LASX_I5_U_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$ui5, CC)))]; ++} ++ ++class LASX_I5_U_SETCC_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_VEC_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xj, ROXK:$xk), ++ [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]>; ++ ++ ++class LASX_I5_U_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; ++} ++ ++ ++class LASX_I5_U_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; ++} ++ ++class LASX_U5N_DESC_BASE : ++ LASX_U5_DESC_BASE; ++ ++class LASX_U5_4R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; ++} ++ ++class LASX_SET_DESC_BASE { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROXD:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $xj"); ++ list Pattern = []; ++} ++ ++class LASX_2RF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; ++} ++ ++class LASX_I5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$si5))]; ++} ++ ++class LASX_I5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; ++} ++ ++ ++class LASX_2R_REPL_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROS:$rj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj"); ++ list Pattern = [(set ROXD:$xd, (VT (OpNode ROS:$rj)))]; ++} ++ ++class LASX_XVEXTEND_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (DTy (OpNode (STy ROXJ:$xj))))]; ++} ++ ++class LASX_RORI_U3_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_RORI_U4_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_RORI_U5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_RORI_U6_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_BIT_3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_BIT_4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_BIT_5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_BIT_6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_BIT_2_4O_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui2))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_BIT_2_4ON : ++ LASX_BIT_2_4O_DESC_BASE; ++ ++class LASX_BIT_3_4O_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui3))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_BIT_3_4ON : ++ LASX_BIT_3_4O_DESC_BASE; ++ ++class LASX_INSERT_U3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui3)))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_INSERT_U2_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui2)))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_COPY_U2_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui2"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui2))]; ++} ++ ++class LASX_COPY_U3_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui3"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui3))]; ++} ++ ++class LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; ++} ++ ++class LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; ++} ++ ++class LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm2:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt2:$ui2))]; ++} ++ ++class LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm1:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui1"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt1:$ui1))]; ++} ++ ++class LASX_XVBROADCAST_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode (TyNode ROXJ:$xj)))]; ++} ++ ++class LASX_2R_U3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; ++} ++ ++class LASX_2R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; ++} ++ ++class LASX_2R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; ++} ++ ++class LASX_2R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt6:$ui6))]; ++} ++ ++class LASX_BIT_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui3))]; ++} ++ ++class LASX_BIT_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui4))]; ++} ++ ++class LASX_BIT_U5_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; ++} ++ ++class LASX_BIT_U6_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui6))]; ++} ++ ++class LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in,ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in,ROXJ:$xj, Imm:$ui4))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_N4_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui6))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_D_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt4:$ui4))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt6:$ui6))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U7_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U8_SELECT { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, vsplat_uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, xvsplati8_uimm8:$ui8, ROXJ:$xj))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I8_O4_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I8_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; ++} ++ ++class LASX_2R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; ++} ++ ++class LASX_I13_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins immOp:$i13); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $i13"); ++ list Pattern = [(set ROXD:$xd, (OpNode (Ty simm13:$i13)))]; ++ string DecoderMethod = "DecodeLASX256Mem13"; ++} ++ ++class LASX_I13_DESC_BASE_10 { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ImmOp:$i10); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $i10"); ++ bit hasSideEffects = 0; ++ string DecoderMethod = "DecodeLASX256Mem10"; ++ list Pattern = [(set ROXD:$xd, (OpNode Imm:$i10))]; ++ } ++ ++class LASX_BIT_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui8))]; ++} ++ ++class LASX_2RN_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = []; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++//encoding ++ ++def XVFMADD_S : LASX_4R<0b000010100001>, ++ LASX_4RF<"xvfmadd.s", int_loongarch_lasx_xvfmadd_s, LASX256WOpnd>; ++ ++def XVFMADD_D : LASX_4R<0b000010100010>, ++ LASX_4RF<"xvfmadd.d", int_loongarch_lasx_xvfmadd_d, LASX256DOpnd>; ++ ++ ++def XVFMSUB_S : LASX_4R<0b000010100101>, ++ LASX_4RF<"xvfmsub.s", int_loongarch_lasx_xvfmsub_s, LASX256WOpnd>; ++ ++def XVFMSUB_D : LASX_4R<0b000010100110>, ++ LASX_4RF<"xvfmsub.d", int_loongarch_lasx_xvfmsub_d, LASX256DOpnd>; ++ ++ ++def XVFNMADD_S : LASX_4R<0b000010101001>, ++ LASX_4RF<"xvfnmadd.s", int_loongarch_lasx_xvfnmadd_s, LASX256WOpnd>; ++ ++def XVFNMADD_D : LASX_4R<0b000010101010>, ++ LASX_4RF<"xvfnmadd.d", int_loongarch_lasx_xvfnmadd_d, LASX256DOpnd>; ++ ++ ++def XVFNMSUB_S : LASX_4R<0b000010101101>, ++ LASX_4RF<"xvfnmsub.s", int_loongarch_lasx_xvfnmsub_s, LASX256WOpnd>; ++ ++def XVFNMSUB_D : LASX_4R<0b000010101110>, ++ LASX_4RF<"xvfnmsub.d", int_loongarch_lasx_xvfnmsub_d, LASX256DOpnd>; ++ ++ ++// xvfmadd: xj * xk + xa ++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), ++ (XVFMADD_D $xj, $xk, $xa)>; ++ ++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), ++ (XVFMADD_S $xj, $xk, $xa)>; ++ ++ ++// xvfmsub: xj * xk - xa ++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++ ++// xvfnmadd: -(xj * xk + xa) ++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++// xvfnmsub: -(xj * xk - xa) ++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++ ++def XVFCMP_CAF_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.caf.s", int_loongarch_lasx_xvfcmp_caf_s, LASX256WOpnd>{ ++ bits<5> cond=0x0; ++ } ++ ++def XVFCMP_CAF_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.caf.d", int_loongarch_lasx_xvfcmp_caf_d, LASX256DOpnd>{ ++ bits<5> cond=0x0; ++ } ++ ++def XVFCMP_COR_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cor.s", vfsetord_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x14; ++ } ++ ++def XVFCMP_COR_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cor.d", vfsetord_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x14; ++ } ++ ++def XVFCMP_CUN_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cun.s", vfsetun_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x8; ++ } ++ ++def XVFCMP_CUN_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cun.d", vfsetun_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x8; ++ } ++ ++def XVFCMP_CUNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cune.s", vfsetune_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x18; ++ } ++ ++def XVFCMP_CUNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cune.d", vfsetune_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x18; ++ } ++ ++def XVFCMP_CUEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cueq.s", vfsetueq_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xc; ++ } ++ ++def XVFCMP_CUEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cueq.d", vfsetueq_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xc; ++ } ++ ++def XVFCMP_CEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.ceq.s", vfsetoeq_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x4; ++ } ++ ++def XVFCMP_CEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.ceq.d", vfsetoeq_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x4; ++ } ++ ++def XVFCMP_CNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cne.s", vfsetone_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x10; ++ } ++ ++def XVFCMP_CNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cne.d", vfsetone_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x10; ++ } ++ ++def XVFCMP_CLT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.clt.s", vfsetolt_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x2; ++ } ++ ++def XVFCMP_CLT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.clt.d", vfsetolt_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x2; ++ } ++ ++def XVFCMP_CULT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cult.s", vfsetult_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xa; ++ } ++ ++def XVFCMP_CULT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cult.d", vfsetult_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xa; ++ } ++ ++def XVFCMP_CLE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cle.s", vfsetole_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x6; ++ } ++ ++def XVFCMP_CLE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cle.d", vfsetole_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x6; ++ } ++ ++def XVFCMP_CULE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cule.s", vfsetule_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xe; ++ } ++ ++def XVFCMP_CULE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cule.d", vfsetule_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xe; ++ } ++ ++def XVFCMP_SAF_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.saf.s", int_loongarch_lasx_xvfcmp_saf_s, LASX256WOpnd>{ ++ bits<5> cond=0x1; ++ } ++ ++def XVFCMP_SAF_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.saf.d", int_loongarch_lasx_xvfcmp_saf_d, LASX256DOpnd>{ ++ bits<5> cond=0x1; ++ } ++ ++def XVFCMP_SOR_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sor.s", int_loongarch_lasx_xvfcmp_sor_s, LASX256WOpnd>{ ++ bits<5> cond=0x15; ++ } ++ ++def XVFCMP_SOR_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sor.d", int_loongarch_lasx_xvfcmp_sor_d, LASX256DOpnd>{ ++ bits<5> cond=0x15; ++ } ++ ++def XVFCMP_SUN_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sun.s", int_loongarch_lasx_xvfcmp_sun_s, LASX256WOpnd>{ ++ bits<5> cond=0x9; ++ } ++ ++def XVFCMP_SUN_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sun.d", int_loongarch_lasx_xvfcmp_sun_d, LASX256DOpnd>{ ++ bits<5> cond=0x9; ++ } ++ ++def XVFCMP_SUNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sune.s", int_loongarch_lasx_xvfcmp_sune_s, LASX256WOpnd>{ ++ bits<5> cond=0x19; ++ } ++ ++def XVFCMP_SUNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sune.d", int_loongarch_lasx_xvfcmp_sune_d, LASX256DOpnd>{ ++ bits<5> cond=0x19; ++ } ++ ++def XVFCMP_SUEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sueq.s", int_loongarch_lasx_xvfcmp_sueq_s, LASX256WOpnd>{ ++ bits<5> cond=0xd; ++ } ++ ++def XVFCMP_SUEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sueq.d", int_loongarch_lasx_xvfcmp_sueq_d, LASX256DOpnd>{ ++ bits<5> cond=0xd; ++ } ++ ++def XVFCMP_SEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.seq.s", int_loongarch_lasx_xvfcmp_seq_s, LASX256WOpnd>{ ++ bits<5> cond=0x5; ++ } ++ ++def XVFCMP_SEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.seq.d", int_loongarch_lasx_xvfcmp_seq_d, LASX256DOpnd>{ ++ bits<5> cond=0x5; ++ } ++ ++def XVFCMP_SNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sne.s", int_loongarch_lasx_xvfcmp_sne_s, LASX256WOpnd>{ ++ bits<5> cond=0x11; ++ } ++ ++def XVFCMP_SNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sne.d", int_loongarch_lasx_xvfcmp_sne_d, LASX256DOpnd>{ ++ bits<5> cond=0x11; ++ } ++ ++def XVFCMP_SLT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.slt.s", int_loongarch_lasx_xvfcmp_slt_s, LASX256WOpnd>{ ++ bits<5> cond=0x3; ++ } ++ ++def XVFCMP_SLT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.slt.d", int_loongarch_lasx_xvfcmp_slt_d, LASX256DOpnd>{ ++ bits<5> cond=0x3; ++ } ++ ++def XVFCMP_SULT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sult.s", int_loongarch_lasx_xvfcmp_sult_s, LASX256WOpnd>{ ++ bits<5> cond=0xb; ++ } ++ ++def XVFCMP_SULT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sult.d", int_loongarch_lasx_xvfcmp_sult_d, LASX256DOpnd>{ ++ bits<5> cond=0xb; ++ } ++ ++def XVFCMP_SLE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sle.s", int_loongarch_lasx_xvfcmp_sle_s, LASX256WOpnd>{ ++ bits<5> cond=0x7; ++ } ++ ++def XVFCMP_SLE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sle.d", int_loongarch_lasx_xvfcmp_sle_d, LASX256DOpnd>{ ++ bits<5> cond=0x7; ++ } ++ ++def XVFCMP_SULE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sule.s", int_loongarch_lasx_xvfcmp_sule_s, LASX256WOpnd>{ ++ bits<5> cond=0xf; ++ } ++ ++def XVFCMP_SULE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sule.d", int_loongarch_lasx_xvfcmp_sule_d, LASX256DOpnd>{ ++ bits<5> cond=0xf; ++ } ++ ++ ++def XVBITSEL_V : LASX_4R<0b000011010010>, ++ LASX_4RF<"xvbitsel.v", int_loongarch_lasx_xvbitsel_v, LASX256BOpnd>; ++ ++class LASX_BSEL_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xd_in, RO:$xs, RO:$xt), ++ [(set RO:$xd, (Ty (vselect RO:$xd_in, RO:$xt, RO:$xs)))]>, ++ PseudoInstExpansion<(XVBITSEL_V LASX256BOpnd:$xd, LASX256BOpnd:$xs, ++ LASX256BOpnd:$xt, LASX256BOpnd:$xd_in)> { ++ let Constraints = "$xd_in = $xd"; ++} ++ ++def XBSEL_B_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_H_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_W_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_D_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_FW_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_FD_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++ ++ ++ ++def XVSHUF_B : LASX_4R<0b000011010110>, ++ LASX_4RF<"xvshuf.b", int_loongarch_lasx_xvshuf_b, LASX256BOpnd>; ++ ++ ++def XVLD : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v32i8, LASX256BOpnd, mem>; ++ ++def XVST : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v32i8, LASX256BOpnd, mem_simm12>; ++ ++ ++class LASX_LD_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(set ROXD:$xd, (OpNode (TyNode (load Addr:$addr))))]; ++ string DecoderMethod = "DecodeLASX256memlsl"; ++} ++ ++ ++def XVLDREPL_B : LASX_SI12_S<0b0011001010>, ++ LASX_LD_DESC_BASE<"xvldrepl.b", xvbroadcast_v32i8, v32i8, LASX256BOpnd>; ++ ++def XVLDREPL_H : LASX_SI11_S<0b00110010010>, ++ LASX_LD_DESC_BASE<"xvldrepl.h", xvbroadcast_v16i16, v16i16, LASX256HOpnd, mem_simm11_lsl1, addrimm11lsl1>; ++ ++def XVLDREPL_W : LASX_SI10_S<0b001100100010>, ++ LASX_LD_DESC_BASE<"xvldrepl.w", xvbroadcast_v8i32, v8i32, LASX256WOpnd, mem_simm10_lsl2, addrimm10lsl2>; ++ ++def XVLDREPL_D : LASX_SI9_S<0b0011001000010>, ++ LASX_LD_DESC_BASE<"xvldrepl.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd, mem_simm9_lsl3, addrimm9lsl3>; ++ ++ ++def XVSTELM_B : LASX_SI8_idx5<0b001100111>, ++ LASX_I8_U5_DESC_BASE<"xvstelm.b", int_loongarch_lasx_xvstelm_b, simm8_32, immSExt8, LASX256BOpnd, GPR32Opnd>; ++ ++def XVSTELM_H : LASX_SI8_idx4<0b0011001101>, ++ LASX_I8_U4_DESC_BASE<"xvstelm.h", int_loongarch_lasx_xvstelm_h, immSExt8_1_O, immSExt8, LASX256HOpnd, GPR32Opnd>; ++ ++def XVSTELM_W : LASX_SI8_idx3<0b00110011001>, ++ LASX_I8_U3_DESC_BASE<"xvstelm.w", int_loongarch_lasx_xvstelm_w, immSExt8_2_O, immSExt8, LASX256WOpnd, GPR32Opnd>; ++ ++def XVSTELM_D : LASX_SI8_idx2<0b001100110001>, ++ LASX_I8_U2_DESC_BASE<"xvstelm.d", int_loongarch_lasx_xvstelm_d, immSExt8_3_O, immSExt8, LASX256DOpnd, GPR32Opnd>; ++ ++let mayLoad = 1, canFoldAsLoad = 1 in { ++ def XVLDX : LASX_3R_2GP<0b00111000010010000>, ++ LASX_LDX_LA<"xvldx", int_loongarch_lasx_xvldx, GPR64Opnd, LASX256BOpnd>; ++} ++ ++let mayStore = 1 in{ ++ def XVSTX : LASX_3R_2GP<0b00111000010011000>, ++ LASX_SDX_LA<"xvstx", int_loongarch_lasx_xvstx, GPR64Opnd, LASX256BOpnd>; ++} ++ ++ ++def XVSEQ_B : LASX_3R<0b01110100000000000>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.b", SETEQ, v32i8, LASX256BOpnd>; ++ ++def XVSEQ_H : LASX_3R<0b01110100000000001>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.h", SETEQ, v16i16, LASX256HOpnd>; ++ ++def XVSEQ_W : LASX_3R<0b01110100000000010>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.w", SETEQ, v8i32, LASX256WOpnd> ; ++ ++def XVSEQ_D : LASX_3R<0b01110100000000011>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.d", SETEQ, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLE_B : LASX_3R<0b01110100000000100>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.b", SETLE, v32i8, LASX256BOpnd>; ++ ++def XVSLE_H : LASX_3R<0b01110100000000101>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.h", SETLE, v16i16, LASX256HOpnd>; ++ ++def XVSLE_W : LASX_3R<0b01110100000000110>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.w", SETLE, v8i32, LASX256WOpnd>; ++ ++def XVSLE_D : LASX_3R<0b01110100000000111>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.d", SETLE, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLE_BU : LASX_3R<0b01110100000001000>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.bu", SETULE, v32i8, LASX256BOpnd>; ++ ++def XVSLE_HU : LASX_3R<0b01110100000001001>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.hu", SETULE, v16i16, LASX256HOpnd>; ++ ++def XVSLE_WU : LASX_3R<0b01110100000001010>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.wu", SETULE, v8i32, LASX256WOpnd>; ++ ++def XVSLE_DU : LASX_3R<0b01110100000001011>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.du", SETULE, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLT_B : LASX_3R<0b01110100000001100>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.b", SETLT, v32i8, LASX256BOpnd>; ++ ++def XVSLT_H : LASX_3R<0b01110100000001101>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.h", SETLT, v16i16, LASX256HOpnd>; ++ ++def XVSLT_W : LASX_3R<0b01110100000001110>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.w", SETLT, v8i32, LASX256WOpnd>; ++ ++def XVSLT_D : LASX_3R<0b01110100000001111>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.d", SETLT, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLT_BU : LASX_3R<0b01110100000010000>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.bu", SETULT, v32i8, LASX256BOpnd>; ++ ++def XVSLT_HU : LASX_3R<0b01110100000010001>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.hu", SETULT, v16i16, LASX256HOpnd>; ++ ++def XVSLT_WU : LASX_3R<0b01110100000010010>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.wu", SETULT, v8i32, LASX256WOpnd>; ++ ++def XVSLT_DU : LASX_3R<0b01110100000010011>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.du", SETULT, v4i64, LASX256DOpnd>; ++ ++ ++def XVADD_B : LASX_3R<0b01110100000010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.b", add, LASX256BOpnd>; ++ ++def XVADD_H : LASX_3R<0b01110100000010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.h", add, LASX256HOpnd>; ++ ++def XVADD_W : LASX_3R<0b01110100000010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.w", add, LASX256WOpnd>; ++ ++def XVADD_D : LASX_3R<0b01110100000010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.d", add, LASX256DOpnd>; ++ ++ ++def XVSUB_B : LASX_3R<0b01110100000011000>, ++ LASX_3R_DESC_BASE<"xvsub.b", sub, LASX256BOpnd>; ++ ++def XVSUB_H : LASX_3R<0b01110100000011001>, ++ LASX_3R_DESC_BASE<"xvsub.h", sub, LASX256HOpnd>; ++ ++def XVSUB_W : LASX_3R<0b01110100000011010>, ++ LASX_3R_DESC_BASE<"xvsub.w", sub, LASX256WOpnd>; ++ ++def XVSUB_D : LASX_3R<0b01110100000011011>, ++ LASX_3R_DESC_BASE<"xvsub.d", sub, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_B : LASX_3R<0b01110100000111100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.b", int_loongarch_lasx_xvaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_H : LASX_3R<0b01110100000111101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.h", int_loongarch_lasx_xvaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_W : LASX_3R<0b01110100000111110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.w", int_loongarch_lasx_xvaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_D : LASX_3R<0b01110100000111111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.d", int_loongarch_lasx_xvaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWEV_H_B : LASX_3R<0b01110100001000000>, ++ LASX_3R_DESC_BASE<"xvsubwev.h.b", int_loongarch_lasx_xvsubwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWEV_W_H : LASX_3R<0b01110100001000001>, ++ LASX_3R_DESC_BASE<"xvsubwev.w.h", int_loongarch_lasx_xvsubwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWEV_D_W : LASX_3R<0b01110100001000010>, ++ LASX_3R_DESC_BASE<"xvsubwev.d.w", int_loongarch_lasx_xvsubwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWEV_Q_D : LASX_3R<0b01110100001000011>, ++ LASX_3R_DESC_BASE<"xvsubwev.q.d", int_loongarch_lasx_xvsubwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_B : LASX_3R<0b01110100001000100>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.b", int_loongarch_lasx_xvaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_H : LASX_3R<0b01110100001000101>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.h", int_loongarch_lasx_xvaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_W : LASX_3R<0b01110100001000110>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.w", int_loongarch_lasx_xvaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_D : LASX_3R<0b01110100001000111>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.d", int_loongarch_lasx_xvaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWOD_H_B : LASX_3R<0b01110100001001000>, ++ LASX_3R_DESC_BASE<"xvsubwod.h.b", int_loongarch_lasx_xvsubwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWOD_W_H : LASX_3R<0b01110100001001001>, ++ LASX_3R_DESC_BASE<"xvsubwod.w.h", int_loongarch_lasx_xvsubwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWOD_D_W : LASX_3R<0b01110100001001010>, ++ LASX_3R_DESC_BASE<"xvsubwod.d.w", int_loongarch_lasx_xvsubwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWOD_Q_D : LASX_3R<0b01110100001001011>, ++ LASX_3R_DESC_BASE<"xvsubwod.q.d", int_loongarch_lasx_xvsubwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_BU : LASX_3R<0b01110100001011100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.bu", int_loongarch_lasx_xvaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_HU : LASX_3R<0b01110100001011101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.hu", int_loongarch_lasx_xvaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_WU : LASX_3R<0b01110100001011110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.wu", int_loongarch_lasx_xvaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_DU : LASX_3R<0b01110100001011111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.du", int_loongarch_lasx_xvaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWEV_H_BU : LASX_3R<0b01110100001100000>, ++ LASX_3R_DESC_BASE<"xvsubwev.h.bu", int_loongarch_lasx_xvsubwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWEV_W_HU : LASX_3R<0b01110100001100001>, ++ LASX_3R_DESC_BASE<"xvsubwev.w.hu", int_loongarch_lasx_xvsubwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWEV_D_WU : LASX_3R<0b01110100001100010>, ++ LASX_3R_DESC_BASE<"xvsubwev.d.wu", int_loongarch_lasx_xvsubwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWEV_Q_DU : LASX_3R<0b01110100001100011>, ++ LASX_3R_DESC_BASE<"xvsubwev.q.du", int_loongarch_lasx_xvsubwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_BU : LASX_3R<0b01110100001100100>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.bu", int_loongarch_lasx_xvaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_HU : LASX_3R<0b01110100001100101>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.hu", int_loongarch_lasx_xvaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_WU : LASX_3R<0b01110100001100110>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.wu", int_loongarch_lasx_xvaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_DU : LASX_3R<0b01110100001100111>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.du", int_loongarch_lasx_xvaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWOD_H_BU : LASX_3R<0b01110100001101000>, ++ LASX_3R_DESC_BASE<"xvsubwod.h.bu", int_loongarch_lasx_xvsubwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWOD_W_HU : LASX_3R<0b01110100001101001>, ++ LASX_3R_DESC_BASE<"xvsubwod.w.hu", int_loongarch_lasx_xvsubwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWOD_D_WU : LASX_3R<0b01110100001101010>, ++ LASX_3R_DESC_BASE<"xvsubwod.d.wu", int_loongarch_lasx_xvsubwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWOD_Q_DU : LASX_3R<0b01110100001101011>, ++ LASX_3R_DESC_BASE<"xvsubwod.q.du", int_loongarch_lasx_xvsubwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_BU_B : LASX_3R<0b01110100001111100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.bu.b", int_loongarch_lasx_xvaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_HU_H : LASX_3R<0b01110100001111101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.hu.h", int_loongarch_lasx_xvaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_WU_W : LASX_3R<0b01110100001111110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.wu.w", int_loongarch_lasx_xvaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_DU_D : LASX_3R<0b01110100001111111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.du.d", int_loongarch_lasx_xvaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_BU_B : LASX_3R<0b01110100010000000>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.bu.b", int_loongarch_lasx_xvaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_HU_H : LASX_3R<0b01110100010000001>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.hu.h", int_loongarch_lasx_xvaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_WU_W : LASX_3R<0b01110100010000010>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.wu.w", int_loongarch_lasx_xvaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_DU_D : LASX_3R<0b01110100010000011>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.du.d", int_loongarch_lasx_xvaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSADD_B : LASX_3R<0b01110100010001100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.b", saddsat, LASX256BOpnd>; ++ ++def XVSADD_H : LASX_3R<0b01110100010001101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.h", saddsat, LASX256HOpnd>; ++ ++def XVSADD_W : LASX_3R<0b01110100010001110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.w", saddsat, LASX256WOpnd>; ++ ++def XVSADD_D : LASX_3R<0b01110100010001111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.d", saddsat, LASX256DOpnd>; ++ ++ ++def XVSSUB_B : LASX_3R<0b01110100010010000>, ++ LASX_3R_DESC_BASE<"xvssub.b", ssubsat, LASX256BOpnd>; ++ ++def XVSSUB_H : LASX_3R<0b01110100010010001>, ++ LASX_3R_DESC_BASE<"xvssub.h", ssubsat, LASX256HOpnd>; ++ ++def XVSSUB_W : LASX_3R<0b01110100010010010>, ++ LASX_3R_DESC_BASE<"xvssub.w", ssubsat, LASX256WOpnd>; ++ ++def XVSSUB_D : LASX_3R<0b01110100010010011>, ++ LASX_3R_DESC_BASE<"xvssub.d", ssubsat, LASX256DOpnd>; ++ ++ ++def XVSADD_BU : LASX_3R<0b01110100010010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.bu", uaddsat, LASX256BOpnd>; ++ ++def XVSADD_HU : LASX_3R<0b01110100010010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.hu", uaddsat, LASX256HOpnd>; ++ ++def XVSADD_WU : LASX_3R<0b01110100010010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.wu", uaddsat, LASX256WOpnd>; ++ ++def XVSADD_DU : LASX_3R<0b01110100010010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.du", uaddsat, LASX256DOpnd>; ++ ++ ++def XVSSUB_BU : LASX_3R<0b01110100010011000>, ++ LASX_3R_DESC_BASE<"xvssub.bu", usubsat, LASX256BOpnd>; ++ ++def XVSSUB_HU : LASX_3R<0b01110100010011001>, ++ LASX_3R_DESC_BASE<"xvssub.hu", usubsat, LASX256HOpnd>; ++ ++def XVSSUB_WU : LASX_3R<0b01110100010011010>, ++ LASX_3R_DESC_BASE<"xvssub.wu", usubsat, LASX256WOpnd>; ++ ++def XVSSUB_DU : LASX_3R<0b01110100010011011>, ++ LASX_3R_DESC_BASE<"xvssub.du", usubsat, LASX256DOpnd>; ++ ++ ++def XVHADDW_H_B : LASX_3R<0b01110100010101000>, ++ LASX_3R_DESC_BASE<"xvhaddw.h.b", int_loongarch_lasx_xvhaddw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHADDW_W_H : LASX_3R<0b01110100010101001>, ++ LASX_3R_DESC_BASE<"xvhaddw.w.h", int_loongarch_lasx_xvhaddw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHADDW_D_W : LASX_3R<0b01110100010101010>, ++ LASX_3R_DESC_BASE<"xvhaddw.d.w", int_loongarch_lasx_xvhaddw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHADDW_Q_D : LASX_3R<0b01110100010101011>, ++ LASX_3R_DESC_BASE<"xvhaddw.q.d", int_loongarch_lasx_xvhaddw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++ ++def XVHSUBW_H_B : LASX_3R<0b01110100010101100>, ++ LASX_3R_DESC_BASE<"xvhsubw.h.b", int_loongarch_lasx_xvhsubw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHSUBW_W_H : LASX_3R<0b01110100010101101>, ++ LASX_3R_DESC_BASE<"xvhsubw.w.h", int_loongarch_lasx_xvhsubw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHSUBW_D_W : LASX_3R<0b01110100010101110>, ++ LASX_3R_DESC_BASE<"xvhsubw.d.w", int_loongarch_lasx_xvhsubw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHSUBW_Q_D : LASX_3R<0b01110100010101111>, ++ LASX_3R_DESC_BASE<"xvhsubw.q.d", int_loongarch_lasx_xvhsubw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVHADDW_HU_BU : LASX_3R<0b01110100010110000>, ++ LASX_3R_DESC_BASE<"xvhaddw.hu.bu", int_loongarch_lasx_xvhaddw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHADDW_WU_HU : LASX_3R<0b01110100010110001>, ++ LASX_3R_DESC_BASE<"xvhaddw.wu.hu", int_loongarch_lasx_xvhaddw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHADDW_DU_WU : LASX_3R<0b01110100010110010>, ++ LASX_3R_DESC_BASE<"xvhaddw.du.wu", int_loongarch_lasx_xvhaddw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHADDW_QU_DU : LASX_3R<0b01110100010110011>, ++ LASX_3R_DESC_BASE<"xvhaddw.qu.du", int_loongarch_lasx_xvhaddw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++ ++def XVHSUBW_HU_BU : LASX_3R<0b01110100010110100>, ++ LASX_3R_DESC_BASE<"xvhsubw.hu.bu", int_loongarch_lasx_xvhsubw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHSUBW_WU_HU : LASX_3R<0b01110100010110101>, ++ LASX_3R_DESC_BASE<"xvhsubw.wu.hu", int_loongarch_lasx_xvhsubw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHSUBW_DU_WU : LASX_3R<0b01110100010110110>, ++ LASX_3R_DESC_BASE<"xvhsubw.du.wu", int_loongarch_lasx_xvhsubw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHSUBW_QU_DU : LASX_3R<0b01110100010110111>, ++ LASX_3R_DESC_BASE<"xvhsubw.qu.du", int_loongarch_lasx_xvhsubw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDA_B : LASX_3R<0b01110100010111000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.b", int_loongarch_lasx_xvadda_b, LASX256BOpnd>; ++ ++def XVADDA_H : LASX_3R<0b01110100010111001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.h", int_loongarch_lasx_xvadda_h, LASX256HOpnd>; ++ ++def XVADDA_W : LASX_3R<0b01110100010111010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.w", int_loongarch_lasx_xvadda_w, LASX256WOpnd>; ++ ++def XVADDA_D : LASX_3R<0b01110100010111011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.d", int_loongarch_lasx_xvadda_d, LASX256DOpnd>; ++ ++ ++def XVABSD_B : LASX_3R<0b01110100011000000>, ++ LASX_3R_DESC_BASE<"xvabsd.b", int_loongarch_lasx_xvabsd_b, LASX256BOpnd>; ++ ++def XVABSD_H : LASX_3R<0b01110100011000001>, ++ LASX_3R_DESC_BASE<"xvabsd.h", int_loongarch_lasx_xvabsd_h, LASX256HOpnd>; ++ ++def XVABSD_W : LASX_3R<0b01110100011000010>, ++ LASX_3R_DESC_BASE<"xvabsd.w", int_loongarch_lasx_xvabsd_w, LASX256WOpnd>; ++ ++def XVABSD_D : LASX_3R<0b01110100011000011>, ++ LASX_3R_DESC_BASE<"xvabsd.d", int_loongarch_lasx_xvabsd_d, LASX256DOpnd>; ++ ++ ++def XVABSD_BU : LASX_3R<0b01110100011000100>, ++ LASX_3R_DESC_BASE<"xvabsd.bu", int_loongarch_lasx_xvabsd_bu, LASX256BOpnd>; ++ ++def XVABSD_HU : LASX_3R<0b01110100011000101>, ++ LASX_3R_DESC_BASE<"xvabsd.hu", int_loongarch_lasx_xvabsd_hu, LASX256HOpnd>; ++ ++def XVABSD_WU : LASX_3R<0b01110100011000110>, ++ LASX_3R_DESC_BASE<"xvabsd.wu", int_loongarch_lasx_xvabsd_wu, LASX256WOpnd>; ++ ++def XVABSD_DU : LASX_3R<0b01110100011000111>, ++ LASX_3R_DESC_BASE<"xvabsd.du", int_loongarch_lasx_xvabsd_du, LASX256DOpnd>; ++ ++ ++def XVAVG_B : LASX_3R<0b01110100011001000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.b", int_loongarch_lasx_xvavg_b, LASX256BOpnd>; ++ ++def XVAVG_H : LASX_3R<0b01110100011001001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.h", int_loongarch_lasx_xvavg_h, LASX256HOpnd>; ++ ++def XVAVG_W : LASX_3R<0b01110100011001010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.w", int_loongarch_lasx_xvavg_w, LASX256WOpnd>; ++ ++def XVAVG_D : LASX_3R<0b01110100011001011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.d", int_loongarch_lasx_xvavg_d, LASX256DOpnd>; ++ ++ ++def XVAVG_BU : LASX_3R<0b01110100011001100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.bu", int_loongarch_lasx_xvavg_bu, LASX256BOpnd>; ++ ++def XVAVG_HU : LASX_3R<0b01110100011001101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.hu", int_loongarch_lasx_xvavg_hu, LASX256HOpnd>; ++ ++def XVAVG_WU : LASX_3R<0b01110100011001110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.wu", int_loongarch_lasx_xvavg_wu, LASX256WOpnd>; ++ ++def XVAVG_DU : LASX_3R<0b01110100011001111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.du", int_loongarch_lasx_xvavg_du, LASX256DOpnd>; ++ ++ ++def XVAVGR_B : LASX_3R<0b01110100011010000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.b", int_loongarch_lasx_xvavgr_b, LASX256BOpnd>; ++ ++def XVAVGR_H : LASX_3R<0b01110100011010001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.h", int_loongarch_lasx_xvavgr_h, LASX256HOpnd>; ++ ++def XVAVGR_W : LASX_3R<0b01110100011010010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.w", int_loongarch_lasx_xvavgr_w, LASX256WOpnd>; ++ ++def XVAVGR_D : LASX_3R<0b01110100011010011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.d", int_loongarch_lasx_xvavgr_d, LASX256DOpnd>; ++ ++ ++def XVAVGR_BU : LASX_3R<0b01110100011010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.bu", int_loongarch_lasx_xvavgr_bu, LASX256BOpnd>; ++ ++def XVAVGR_HU : LASX_3R<0b01110100011010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.hu", int_loongarch_lasx_xvavgr_hu, LASX256HOpnd>; ++ ++def XVAVGR_WU : LASX_3R<0b01110100011010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.wu", int_loongarch_lasx_xvavgr_wu, LASX256WOpnd>; ++ ++def XVAVGR_DU : LASX_3R<0b01110100011010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.du", int_loongarch_lasx_xvavgr_du, LASX256DOpnd>; ++ ++ ++def XVMAX_B : LASX_3R<0b01110100011100000>, ++ LASX_3R_DESC_BASE<"xvmax.b", smax, LASX256BOpnd>; ++ ++def XVMAX_H : LASX_3R<0b01110100011100001>, ++ LASX_3R_DESC_BASE<"xvmax.h", smax, LASX256HOpnd>; ++ ++def XVMAX_W : LASX_3R<0b01110100011100010>, ++ LASX_3R_DESC_BASE<"xvmax.w", smax, LASX256WOpnd>; ++ ++def XVMAX_D : LASX_3R<0b01110100011100011>, ++ LASX_3R_DESC_BASE<"xvmax.d", smax, LASX256DOpnd>; ++ ++ ++def XVMIN_B : LASX_3R<0b01110100011100100>, ++ LASX_3R_DESC_BASE<"xvmin.b", smin, LASX256BOpnd>; ++ ++def XVMIN_H : LASX_3R<0b01110100011100101>, ++ LASX_3R_DESC_BASE<"xvmin.h", smin, LASX256HOpnd>; ++ ++def XVMIN_W : LASX_3R<0b01110100011100110>, ++ LASX_3R_DESC_BASE<"xvmin.w", smin, LASX256WOpnd>; ++ ++def XVMIN_D : LASX_3R<0b01110100011100111>, ++ LASX_3R_DESC_BASE<"xvmin.d", smin, LASX256DOpnd>; ++ ++ ++def XVMAX_BU : LASX_3R<0b01110100011101000>, ++ LASX_3R_DESC_BASE<"xvmax.bu", umax, LASX256BOpnd>; ++ ++def XVMAX_HU : LASX_3R<0b01110100011101001>, ++ LASX_3R_DESC_BASE<"xvmax.hu", umax, LASX256HOpnd>; ++ ++def XVMAX_WU : LASX_3R<0b01110100011101010>, ++ LASX_3R_DESC_BASE<"xvmax.wu", umax, LASX256WOpnd>; ++ ++def XVMAX_DU : LASX_3R<0b01110100011101011>, ++ LASX_3R_DESC_BASE<"xvmax.du", umax, LASX256DOpnd>; ++ ++ ++def XVMIN_BU : LASX_3R<0b01110100011101100>, ++ LASX_3R_DESC_BASE<"xvmin.bu", umin, LASX256BOpnd>; ++ ++def XVMIN_HU : LASX_3R<0b01110100011101101>, ++ LASX_3R_DESC_BASE<"xvmin.hu", umin, LASX256HOpnd>; ++ ++def XVMIN_WU : LASX_3R<0b01110100011101110>, ++ LASX_3R_DESC_BASE<"xvmin.wu", umin, LASX256WOpnd>; ++ ++def XVMIN_DU : LASX_3R<0b01110100011101111>, ++ LASX_3R_DESC_BASE<"xvmin.du", umin, LASX256DOpnd>; ++ ++ ++def XVMUL_B : LASX_3R<0b01110100100001000>, ++ LASX_3R_DESC_BASE<"xvmul.b", mul, LASX256BOpnd>, IsCommutable; ++ ++def XVMUL_H : LASX_3R<0b01110100100001001>, ++ LASX_3R_DESC_BASE<"xvmul.h", mul, LASX256HOpnd>, IsCommutable; ++ ++def XVMUL_W : LASX_3R<0b01110100100001010>, ++ LASX_3R_DESC_BASE<"xvmul.w", mul, LASX256WOpnd>, IsCommutable; ++ ++def XVMUL_D : LASX_3R<0b01110100100001011>, ++ LASX_3R_DESC_BASE<"xvmul.d", mul, LASX256DOpnd>, IsCommutable; ++ ++ ++def XVMUH_B : LASX_3R<0b01110100100001100>, ++ LASX_3R_DESC_BASE<"xvmuh.b", int_loongarch_lasx_xvmuh_b, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMUH_H : LASX_3R<0b01110100100001101>, ++ LASX_3R_DESC_BASE<"xvmuh.h", int_loongarch_lasx_xvmuh_h, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMUH_W : LASX_3R<0b01110100100001110>, ++ LASX_3R_DESC_BASE<"xvmuh.w", int_loongarch_lasx_xvmuh_w, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMUH_D : LASX_3R<0b01110100100001111>, ++ LASX_3R_DESC_BASE<"xvmuh.d", int_loongarch_lasx_xvmuh_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMUH_BU : LASX_3R<0b01110100100010000>, ++ LASX_3R_DESC_BASE<"xvmuh.bu", int_loongarch_lasx_xvmuh_bu, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMUH_HU : LASX_3R<0b01110100100010001>, ++ LASX_3R_DESC_BASE<"xvmuh.hu", int_loongarch_lasx_xvmuh_hu, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMUH_WU : LASX_3R<0b01110100100010010>, ++ LASX_3R_DESC_BASE<"xvmuh.wu", int_loongarch_lasx_xvmuh_wu, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMUH_DU : LASX_3R<0b01110100100010011>, ++ LASX_3R_DESC_BASE<"xvmuh.du", int_loongarch_lasx_xvmuh_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_B : LASX_3R<0b01110100100100000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.b", int_loongarch_lasx_xvmulwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_H : LASX_3R<0b01110100100100001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.h", int_loongarch_lasx_xvmulwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_W : LASX_3R<0b01110100100100010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.w", int_loongarch_lasx_xvmulwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_D : LASX_3R<0b01110100100100011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.d", int_loongarch_lasx_xvmulwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_B : LASX_3R<0b01110100100100100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.b", int_loongarch_lasx_xvmulwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_H : LASX_3R<0b01110100100100101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.h", int_loongarch_lasx_xvmulwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_W : LASX_3R<0b01110100100100110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.w", int_loongarch_lasx_xvmulwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_D : LASX_3R<0b01110100100100111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.d", int_loongarch_lasx_xvmulwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_BU : LASX_3R<0b01110100100110000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.bu", int_loongarch_lasx_xvmulwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_HU : LASX_3R<0b01110100100110001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.hu", int_loongarch_lasx_xvmulwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_WU : LASX_3R<0b01110100100110010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.wu", int_loongarch_lasx_xvmulwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_DU : LASX_3R<0b01110100100110011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.du", int_loongarch_lasx_xvmulwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_BU : LASX_3R<0b01110100100110100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.bu", int_loongarch_lasx_xvmulwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_HU : LASX_3R<0b01110100100110101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.hu", int_loongarch_lasx_xvmulwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_WU : LASX_3R<0b01110100100110110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.wu", int_loongarch_lasx_xvmulwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_DU : LASX_3R<0b01110100100110111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.du", int_loongarch_lasx_xvmulwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_BU_B : LASX_3R<0b01110100101000000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.bu.b", int_loongarch_lasx_xvmulwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_HU_H : LASX_3R<0b01110100101000001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.hu.h", int_loongarch_lasx_xvmulwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_WU_W : LASX_3R<0b01110100101000010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.wu.w", int_loongarch_lasx_xvmulwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_DU_D : LASX_3R<0b01110100101000011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.du.d", int_loongarch_lasx_xvmulwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_BU_B : LASX_3R<0b01110100101000100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.bu.b", int_loongarch_lasx_xvmulwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_HU_H : LASX_3R<0b01110100101000101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.hu.h", int_loongarch_lasx_xvmulwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_WU_W : LASX_3R<0b01110100101000110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.wu.w", int_loongarch_lasx_xvmulwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_DU_D : LASX_3R<0b01110100101000111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.du.d", int_loongarch_lasx_xvmulwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADD_B : LASX_3R<0b01110100101010000>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.b", muladd, LASX256BOpnd>; ++ ++def XVMADD_H : LASX_3R<0b01110100101010001>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.h", muladd, LASX256HOpnd>; ++ ++def XVMADD_W : LASX_3R<0b01110100101010010>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.w", muladd, LASX256WOpnd>; ++ ++def XVMADD_D : LASX_3R<0b01110100101010011>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.d", muladd, LASX256DOpnd>; ++ ++ ++def XVMSUB_B : LASX_3R<0b01110100101010100>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.b", mulsub, LASX256BOpnd>; ++ ++def XVMSUB_H : LASX_3R<0b01110100101010101>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.h", mulsub, LASX256HOpnd>; ++ ++def XVMSUB_W : LASX_3R<0b01110100101010110>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.w", mulsub, LASX256WOpnd>; ++ ++def XVMSUB_D : LASX_3R<0b01110100101010111>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.d", mulsub, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_B : LASX_3R<0b01110100101011000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.b", int_loongarch_lasx_xvmaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_H : LASX_3R<0b01110100101011001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.h", int_loongarch_lasx_xvmaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_W : LASX_3R<0b01110100101011010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.w", int_loongarch_lasx_xvmaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_D : LASX_3R<0b01110100101011011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.d", int_loongarch_lasx_xvmaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_B : LASX_3R<0b01110100101011100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.b", int_loongarch_lasx_xvmaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_H : LASX_3R<0b01110100101011101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.h", int_loongarch_lasx_xvmaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_W : LASX_3R<0b01110100101011110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.w", int_loongarch_lasx_xvmaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_D : LASX_3R<0b01110100101011111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.d", int_loongarch_lasx_xvmaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_BU : LASX_3R<0b01110100101101000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu", int_loongarch_lasx_xvmaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_HU : LASX_3R<0b01110100101101001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu", int_loongarch_lasx_xvmaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_WU : LASX_3R<0b01110100101101010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu", int_loongarch_lasx_xvmaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_DU : LASX_3R<0b01110100101101011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du", int_loongarch_lasx_xvmaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_BU : LASX_3R<0b01110100101101100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu", int_loongarch_lasx_xvmaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_HU : LASX_3R<0b01110100101101101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu", int_loongarch_lasx_xvmaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_WU : LASX_3R<0b01110100101101110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu", int_loongarch_lasx_xvmaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_DU : LASX_3R<0b01110100101101111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du", int_loongarch_lasx_xvmaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_BU_B : LASX_3R<0b01110100101111000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu.b", int_loongarch_lasx_xvmaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_HU_H : LASX_3R<0b01110100101111001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu.h", int_loongarch_lasx_xvmaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_WU_W : LASX_3R<0b01110100101111010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu.w", int_loongarch_lasx_xvmaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_DU_D : LASX_3R<0b01110100101111011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du.d", int_loongarch_lasx_xvmaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_BU_B : LASX_3R<0b01110100101111100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu.b", int_loongarch_lasx_xvmaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_HU_H : LASX_3R<0b01110100101111101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu.h", int_loongarch_lasx_xvmaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_WU_W : LASX_3R<0b01110100101111110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu.w", int_loongarch_lasx_xvmaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_DU_D : LASX_3R<0b01110100101111111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du.d", int_loongarch_lasx_xvmaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVDIV_B : LASX_3R<0b01110100111000000>, ++ LASX_3R_DESC_BASE<"xvdiv.b", sdiv, LASX256BOpnd>; ++ ++def XVDIV_H : LASX_3R<0b01110100111000001>, ++ LASX_3R_DESC_BASE<"xvdiv.h", sdiv, LASX256HOpnd>; ++ ++def XVDIV_W : LASX_3R<0b01110100111000010>, ++ LASX_3R_DESC_BASE<"xvdiv.w", sdiv, LASX256WOpnd>; ++ ++def XVDIV_D : LASX_3R<0b01110100111000011>, ++ LASX_3R_DESC_BASE<"xvdiv.d", sdiv, LASX256DOpnd>; ++ ++ ++def XVMOD_B : LASX_3R<0b01110100111000100>, ++ LASX_3R_DESC_BASE<"xvmod.b", srem, LASX256BOpnd>; ++ ++def XVMOD_H : LASX_3R<0b01110100111000101>, ++ LASX_3R_DESC_BASE<"xvmod.h", srem, LASX256HOpnd>; ++ ++def XVMOD_W : LASX_3R<0b01110100111000110>, ++ LASX_3R_DESC_BASE<"xvmod.w", srem, LASX256WOpnd>; ++ ++def XVMOD_D : LASX_3R<0b01110100111000111>, ++ LASX_3R_DESC_BASE<"xvmod.d", srem, LASX256DOpnd>; ++ ++ ++def XVDIV_BU : LASX_3R<0b01110100111001000>, ++ LASX_3R_DESC_BASE<"xvdiv.bu", udiv, LASX256BOpnd>; ++ ++def XVDIV_HU : LASX_3R<0b01110100111001001>, ++ LASX_3R_DESC_BASE<"xvdiv.hu", udiv, LASX256HOpnd>; ++ ++def XVDIV_WU : LASX_3R<0b01110100111001010>, ++ LASX_3R_DESC_BASE<"xvdiv.wu", udiv, LASX256WOpnd>; ++ ++def XVDIV_DU : LASX_3R<0b01110100111001011>, ++ LASX_3R_DESC_BASE<"xvdiv.du", udiv, LASX256DOpnd>; ++ ++ ++def XVMOD_BU : LASX_3R<0b01110100111001100>, ++ LASX_3R_DESC_BASE<"xvmod.bu", urem, LASX256BOpnd>; ++ ++def XVMOD_HU : LASX_3R<0b01110100111001101>, ++ LASX_3R_DESC_BASE<"xvmod.hu", urem, LASX256HOpnd>; ++ ++def XVMOD_WU : LASX_3R<0b01110100111001110>, ++ LASX_3R_DESC_BASE<"xvmod.wu", urem, LASX256WOpnd>; ++ ++def XVMOD_DU : LASX_3R<0b01110100111001111>, ++ LASX_3R_DESC_BASE<"xvmod.du", urem, LASX256DOpnd>; ++ ++ ++def XVSLL_B : LASX_3R<0b01110100111010000>, ++ LASX_3R_DESC_BASE<"xvsll.b", shl, LASX256BOpnd>; ++ ++def XVSLL_H : LASX_3R<0b01110100111010001>, ++ LASX_3R_DESC_BASE<"xvsll.h", shl, LASX256HOpnd>; ++ ++def XVSLL_W : LASX_3R<0b01110100111010010>, ++ LASX_3R_DESC_BASE<"xvsll.w", shl, LASX256WOpnd>; ++ ++def XVSLL_D : LASX_3R<0b01110100111010011>, ++ LASX_3R_DESC_BASE<"xvsll.d", shl, LASX256DOpnd>; ++ ++ ++def XVSRL_B : LASX_3R<0b01110100111010100>, ++ LASX_3R_DESC_BASE<"xvsrl.b", srl, LASX256BOpnd>; ++ ++def XVSRL_H : LASX_3R<0b01110100111010101>, ++ LASX_3R_DESC_BASE<"xvsrl.h", srl, LASX256HOpnd>; ++ ++def XVSRL_W : LASX_3R<0b01110100111010110>, ++ LASX_3R_DESC_BASE<"xvsrl.w", srl, LASX256WOpnd>; ++ ++def XVSRL_D : LASX_3R<0b01110100111010111>, ++ LASX_3R_DESC_BASE<"xvsrl.d", srl, LASX256DOpnd>; ++ ++ ++def XVSRA_B : LASX_3R<0b01110100111011000>, ++ LASX_3R_DESC_BASE<"xvsra.b", sra, LASX256BOpnd>; ++ ++def XVSRA_H : LASX_3R<0b01110100111011001>, ++ LASX_3R_DESC_BASE<"xvsra.h", sra, LASX256HOpnd>; ++ ++def XVSRA_W : LASX_3R<0b01110100111011010>, ++ LASX_3R_DESC_BASE<"xvsra.w", sra, LASX256WOpnd>; ++ ++def XVSRA_D : LASX_3R<0b01110100111011011>, ++ LASX_3R_DESC_BASE<"xvsra.d", sra, LASX256DOpnd>; ++ ++ ++def XVROTR_B : LASX_3R<0b01110100111011100>, ++ LASX_3R_DESC_BASE<"xvrotr.b", int_loongarch_lasx_xvrotr_b, LASX256BOpnd>; ++ ++def XVROTR_H : LASX_3R<0b01110100111011101>, ++ LASX_3R_DESC_BASE<"xvrotr.h", int_loongarch_lasx_xvrotr_h, LASX256HOpnd>; ++ ++def XVROTR_W : LASX_3R<0b01110100111011110>, ++ LASX_3R_DESC_BASE<"xvrotr.w", int_loongarch_lasx_xvrotr_w, LASX256WOpnd>; ++ ++def XVROTR_D : LASX_3R<0b01110100111011111>, ++ LASX_3R_DESC_BASE<"xvrotr.d", int_loongarch_lasx_xvrotr_d, LASX256DOpnd>; ++ ++ ++def XVSRLR_B : LASX_3R<0b01110100111100000>, ++ LASX_3R_DESC_BASE<"xvsrlr.b", int_loongarch_lasx_xvsrlr_b, LASX256BOpnd>; ++ ++def XVSRLR_H : LASX_3R<0b01110100111100001>, ++ LASX_3R_DESC_BASE<"xvsrlr.h", int_loongarch_lasx_xvsrlr_h, LASX256HOpnd>; ++ ++def XVSRLR_W : LASX_3R<0b01110100111100010>, ++ LASX_3R_DESC_BASE<"xvsrlr.w", int_loongarch_lasx_xvsrlr_w, LASX256WOpnd>; ++ ++def XVSRLR_D : LASX_3R<0b01110100111100011>, ++ LASX_3R_DESC_BASE<"xvsrlr.d", int_loongarch_lasx_xvsrlr_d, LASX256DOpnd>; ++ ++ ++def XVSRAR_B : LASX_3R<0b01110100111100100>, ++ LASX_3R_DESC_BASE<"xvsrar.b", int_loongarch_lasx_xvsrar_b, LASX256BOpnd>; ++ ++def XVSRAR_H : LASX_3R<0b01110100111100101>, ++ LASX_3R_DESC_BASE<"xvsrar.h", int_loongarch_lasx_xvsrar_h, LASX256HOpnd>; ++ ++def XVSRAR_W : LASX_3R<0b01110100111100110>, ++ LASX_3R_DESC_BASE<"xvsrar.w", int_loongarch_lasx_xvsrar_w, LASX256WOpnd>; ++ ++def XVSRAR_D : LASX_3R<0b01110100111100111>, ++ LASX_3R_DESC_BASE<"xvsrar.d", int_loongarch_lasx_xvsrar_d, LASX256DOpnd>; ++ ++ ++def XVSRLN_B_H : LASX_3R<0b01110100111101001>, ++ LASX_3R_DESC_BASE<"xvsrln.b.h", int_loongarch_lasx_xvsrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRLN_H_W : LASX_3R<0b01110100111101010>, ++ LASX_3R_DESC_BASE<"xvsrln.h.w", int_loongarch_lasx_xvsrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRLN_W_D : LASX_3R<0b01110100111101011>, ++ LASX_3R_DESC_BASE<"xvsrln.w.d", int_loongarch_lasx_xvsrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRAN_B_H : LASX_3R<0b01110100111101101>, ++ LASX_3R_DESC_BASE<"xvsran.b.h", int_loongarch_lasx_xvsran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRAN_H_W : LASX_3R<0b01110100111101110>, ++ LASX_3R_DESC_BASE<"xvsran.h.w", int_loongarch_lasx_xvsran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRAN_W_D : LASX_3R<0b01110100111101111>, ++ LASX_3R_DESC_BASE<"xvsran.w.d", int_loongarch_lasx_xvsran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRLRN_B_H : LASX_3R<0b01110100111110001>, ++ LASX_3R_DESC_BASE<"xvsrlrn.b.h", int_loongarch_lasx_xvsrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRLRN_H_W : LASX_3R<0b01110100111110010>, ++ LASX_3R_DESC_BASE<"xvsrlrn.h.w", int_loongarch_lasx_xvsrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRLRN_W_D : LASX_3R<0b01110100111110011>, ++ LASX_3R_DESC_BASE<"xvsrlrn.w.d", int_loongarch_lasx_xvsrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRARN_B_H : LASX_3R<0b01110100111110101>, ++ LASX_3R_DESC_BASE<"xvsrarn.b.h", int_loongarch_lasx_xvsrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRARN_H_W : LASX_3R<0b01110100111110110>, ++ LASX_3R_DESC_BASE<"xvsrarn.h.w", int_loongarch_lasx_xvsrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRARN_W_D : LASX_3R<0b01110100111110111>, ++ LASX_3R_DESC_BASE<"xvsrarn.w.d", int_loongarch_lasx_xvsrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLN_B_H : LASX_3R<0b01110100111111001>, ++ LASX_3R_DESC_BASE<"xvssrln.b.h", int_loongarch_lasx_xvssrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLN_H_W : LASX_3R<0b01110100111111010>, ++ LASX_3R_DESC_BASE<"xvssrln.h.w", int_loongarch_lasx_xvssrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLN_W_D : LASX_3R<0b01110100111111011>, ++ LASX_3R_DESC_BASE<"xvssrln.w.d", int_loongarch_lasx_xvssrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRAN_B_H : LASX_3R<0b01110100111111101>, ++ LASX_3R_DESC_BASE<"xvssran.b.h", int_loongarch_lasx_xvssran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRAN_H_W : LASX_3R<0b01110100111111110>, ++ LASX_3R_DESC_BASE<"xvssran.h.w", int_loongarch_lasx_xvssran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRAN_W_D : LASX_3R<0b01110100111111111>, ++ LASX_3R_DESC_BASE<"xvssran.w.d", int_loongarch_lasx_xvssran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRN_B_H : LASX_3R<0b01110101000000001>, ++ LASX_3R_DESC_BASE<"xvssrlrn.b.h", int_loongarch_lasx_xvssrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRN_H_W : LASX_3R<0b01110101000000010>, ++ LASX_3R_DESC_BASE<"xvssrlrn.h.w", int_loongarch_lasx_xvssrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRN_W_D : LASX_3R<0b01110101000000011>, ++ LASX_3R_DESC_BASE<"xvssrlrn.w.d", int_loongarch_lasx_xvssrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRARN_B_H : LASX_3R<0b01110101000000101>, ++ LASX_3R_DESC_BASE<"xvssrarn.b.h", int_loongarch_lasx_xvssrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRARN_H_W : LASX_3R<0b01110101000000110>, ++ LASX_3R_DESC_BASE<"xvssrarn.h.w", int_loongarch_lasx_xvssrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRARN_W_D : LASX_3R<0b01110101000000111>, ++ LASX_3R_DESC_BASE<"xvssrarn.w.d", int_loongarch_lasx_xvssrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLN_BU_H : LASX_3R<0b01110101000001001>, ++ LASX_3R_DESC_BASE<"xvssrln.bu.h", int_loongarch_lasx_xvssrln_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLN_HU_W : LASX_3R<0b01110101000001010>, ++ LASX_3R_DESC_BASE<"xvssrln.hu.w", int_loongarch_lasx_xvssrln_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLN_WU_D : LASX_3R<0b01110101000001011>, ++ LASX_3R_DESC_BASE<"xvssrln.wu.d", int_loongarch_lasx_xvssrln_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRAN_BU_H : LASX_3R<0b01110101000001101>, ++ LASX_3R_DESC_BASE<"xvssran.bu.h", int_loongarch_lasx_xvssran_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRAN_HU_W : LASX_3R<0b01110101000001110>, ++ LASX_3R_DESC_BASE<"xvssran.hu.w", int_loongarch_lasx_xvssran_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRAN_WU_D : LASX_3R<0b01110101000001111>, ++ LASX_3R_DESC_BASE<"xvssran.wu.d", int_loongarch_lasx_xvssran_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRN_BU_H : LASX_3R<0b01110101000010001>, ++ LASX_3R_DESC_BASE<"xvssrlrn.bu.h", int_loongarch_lasx_xvssrlrn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRN_HU_W : LASX_3R<0b01110101000010010>, ++ LASX_3R_DESC_BASE<"xvssrlrn.hu.w", int_loongarch_lasx_xvssrlrn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRN_WU_D : LASX_3R<0b01110101000010011>, ++ LASX_3R_DESC_BASE<"xvssrlrn.wu.d", int_loongarch_lasx_xvssrlrn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRARN_BU_H : LASX_3R<0b01110101000010101>, ++ LASX_3R_DESC_BASE<"xvssrarn.bu.h", int_loongarch_lasx_xvssrarn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRARN_HU_W : LASX_3R<0b01110101000010110>, ++ LASX_3R_DESC_BASE<"xvssrarn.hu.w", int_loongarch_lasx_xvssrarn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRARN_WU_D : LASX_3R<0b01110101000010111>, ++ LASX_3R_DESC_BASE<"xvssrarn.wu.d", int_loongarch_lasx_xvssrarn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITCLR_B : LASX_3R<0b01110101000011000>, ++ LASX_3R_DESC_BASE<"xvbitclr.b", xvbitclr_b, LASX256BOpnd>; ++ ++def XVBITCLR_H : LASX_3R<0b01110101000011001>, ++ LASX_3R_DESC_BASE<"xvbitclr.h", xvbitclr_h, LASX256HOpnd>; ++ ++def XVBITCLR_W : LASX_3R<0b01110101000011010>, ++ LASX_3R_DESC_BASE<"xvbitclr.w", xvbitclr_w, LASX256WOpnd>; ++ ++def XVBITCLR_D : LASX_3R<0b01110101000011011>, ++ LASX_3R_DESC_BASE<"xvbitclr.d", xvbitclr_d, LASX256DOpnd>; ++ ++ ++def XVBITSET_B : LASX_3R<0b01110101000011100>, ++ LASX_3R_DESC_BASE<"xvbitset.b", int_loongarch_lasx_xvbitset_b, LASX256BOpnd>; ++ ++def XVBITSET_H : LASX_3R<0b01110101000011101>, ++ LASX_3R_DESC_BASE<"xvbitset.h", int_loongarch_lasx_xvbitset_h, LASX256HOpnd>; ++ ++def XVBITSET_W : LASX_3R<0b01110101000011110>, ++ LASX_3R_DESC_BASE<"xvbitset.w", int_loongarch_lasx_xvbitset_w, LASX256WOpnd>; ++ ++def XVBITSET_D : LASX_3R<0b01110101000011111>, ++ LASX_3R_DESC_BASE<"xvbitset.d", int_loongarch_lasx_xvbitset_d, LASX256DOpnd>; ++ ++ ++def XVBITREV_B : LASX_3R<0b01110101000100000>, ++ LASX_3R_DESC_BASE<"xvbitrev.b", int_loongarch_lasx_xvbitrev_b, LASX256BOpnd>; ++ ++def XVBITREV_H : LASX_3R<0b01110101000100001>, ++ LASX_3R_DESC_BASE<"xvbitrev.h", int_loongarch_lasx_xvbitrev_h, LASX256HOpnd>; ++ ++def XVBITREV_W : LASX_3R<0b01110101000100010>, ++ LASX_3R_DESC_BASE<"xvbitrev.w", int_loongarch_lasx_xvbitrev_w, LASX256WOpnd>; ++ ++def XVBITREV_D : LASX_3R<0b01110101000100011>, ++ LASX_3R_DESC_BASE<"xvbitrev.d", int_loongarch_lasx_xvbitrev_d, LASX256DOpnd>; ++ ++ ++def XVPACKEV_B : LASX_3R<0b01110101000101100>, ++ LASX_3R_DESC_BASE<"xvpackev.b", LoongArchVPACKEV, LASX256BOpnd>; ++ ++def XVPACKEV_H : LASX_3R<0b01110101000101101>, ++ LASX_3R_DESC_BASE<"xvpackev.h", LoongArchVPACKEV, LASX256HOpnd>; ++ ++def XVPACKEV_W : LASX_3R<0b01110101000101110>, ++ LASX_3R_DESC_BASE<"xvpackev.w", LoongArchVPACKEV, LASX256WOpnd>; ++ ++def XVPACKEV_D : LASX_3R<0b01110101000101111>, ++ LASX_3R_DESC_BASE<"xvpackev.d", LoongArchVPACKEV, LASX256DOpnd>; ++ ++ ++def XVPACKOD_B : LASX_3R<0b01110101000110000>, ++ LASX_3R_DESC_BASE<"xvpackod.b", LoongArchVPACKOD, LASX256BOpnd>; ++ ++def XVPACKOD_H : LASX_3R<0b01110101000110001>, ++ LASX_3R_DESC_BASE<"xvpackod.h", LoongArchVPACKOD, LASX256HOpnd>; ++ ++def XVPACKOD_W : LASX_3R<0b01110101000110010>, ++ LASX_3R_DESC_BASE<"xvpackod.w", LoongArchVPACKOD, LASX256WOpnd>; ++ ++def XVPACKOD_D : LASX_3R<0b01110101000110011>, ++ LASX_3R_DESC_BASE<"xvpackod.d", LoongArchVPACKOD, LASX256DOpnd>; ++ ++ ++def XVILVL_B : LASX_3R<0b01110101000110100>, ++ LASX_3R_DESC_BASE<"xvilvl.b", LoongArchVILVL, LASX256BOpnd>; ++ ++def XVILVL_H : LASX_3R<0b01110101000110101>, ++ LASX_3R_DESC_BASE<"xvilvl.h", LoongArchVILVL, LASX256HOpnd>; ++ ++def XVILVL_W : LASX_3R<0b01110101000110110>, ++ LASX_3R_DESC_BASE<"xvilvl.w", LoongArchVILVL, LASX256WOpnd>; ++ ++def XVILVL_D : LASX_3R<0b01110101000110111>, ++ LASX_3R_DESC_BASE<"xvilvl.d", LoongArchVILVL, LASX256DOpnd>; ++ ++ ++def XVILVH_B : LASX_3R<0b01110101000111000>, ++ LASX_3R_DESC_BASE<"xvilvh.b", LoongArchVILVH, LASX256BOpnd>; ++ ++def XVILVH_H : LASX_3R<0b01110101000111001>, ++ LASX_3R_DESC_BASE<"xvilvh.h", LoongArchVILVH, LASX256HOpnd>; ++ ++def XVILVH_W : LASX_3R<0b01110101000111010>, ++ LASX_3R_DESC_BASE<"xvilvh.w", LoongArchVILVH, LASX256WOpnd>; ++ ++def XVILVH_D : LASX_3R<0b01110101000111011>, ++ LASX_3R_DESC_BASE<"xvilvh.d", LoongArchVILVH, LASX256DOpnd>; ++ ++ ++def XVPICKEV_B : LASX_3R<0b01110101000111100>, ++ LASX_3R_DESC_BASE<"xvpickev.b", LoongArchVPICKEV, LASX256BOpnd>; ++ ++def XVPICKEV_H : LASX_3R<0b01110101000111101>, ++ LASX_3R_DESC_BASE<"xvpickev.h", LoongArchVPICKEV, LASX256HOpnd>; ++ ++def XVPICKEV_W : LASX_3R<0b01110101000111110>, ++ LASX_3R_DESC_BASE<"xvpickev.w", LoongArchVPICKEV, LASX256WOpnd>; ++ ++def XVPICKEV_D : LASX_3R<0b01110101000111111>, ++ LASX_3R_DESC_BASE<"xvpickev.d", LoongArchVPICKEV, LASX256DOpnd>; ++ ++ ++def XVPICKOD_B : LASX_3R<0b01110101001000000>, ++ LASX_3R_DESC_BASE<"xvpickod.b", LoongArchVPICKOD, LASX256BOpnd>; ++ ++def XVPICKOD_H : LASX_3R<0b01110101001000001>, ++ LASX_3R_DESC_BASE<"xvpickod.h", LoongArchVPICKOD, LASX256HOpnd>; ++ ++def XVPICKOD_W : LASX_3R<0b01110101001000010>, ++ LASX_3R_DESC_BASE<"xvpickod.w", LoongArchVPICKOD, LASX256WOpnd>; ++ ++def XVPICKOD_D : LASX_3R<0b01110101001000011>, ++ LASX_3R_DESC_BASE<"xvpickod.d", LoongArchVPICKOD, LASX256DOpnd>; ++ ++ ++def XVREPLVE_B : LASX_3R_1GP<0b01110101001000100>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.b", int_loongarch_lasx_xvreplve_b, LASX256BOpnd>; ++ ++def XVREPLVE_H : LASX_3R_1GP<0b01110101001000101>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.h", int_loongarch_lasx_xvreplve_h, LASX256HOpnd>; ++ ++def XVREPLVE_W : LASX_3R_1GP<0b01110101001000110>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.w", int_loongarch_lasx_xvreplve_w, LASX256WOpnd>; ++ ++def XVREPLVE_D : LASX_3R_1GP<0b01110101001000111>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.d", int_loongarch_lasx_xvreplve_d, LASX256DOpnd>; ++ ++ ++def XVAND_V : LASX_3R<0b01110101001001100>, ++ LASX_VEC_DESC_BASE<"xvand.v", and, LASX256BOpnd>; ++class XAND_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XAND_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XAND_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XAND_V_H_PSEUDO : XAND_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XAND_V_W_PSEUDO : XAND_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XAND_V_D_PSEUDO : XAND_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVOR_V : LASX_3R<0b01110101001001101>, ++ LASX_VEC_DESC_BASE<"xvor.v", or, LASX256BOpnd>; ++class X_OR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class X_OR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class X_OR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def X_OR_V_H_PSEUDO : X_OR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def X_OR_V_W_PSEUDO : X_OR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def X_OR_V_D_PSEUDO : X_OR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVXOR_V : LASX_3R<0b01110101001001110>, ++ LASX_VEC_DESC_BASE<"xvxor.v", xor, LASX256BOpnd>; ++class XXOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XXOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XXOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XXOR_V_H_PSEUDO : XXOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XXOR_V_W_PSEUDO : XXOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XXOR_V_D_PSEUDO : XXOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVNOR_V : LASX_3R<0b01110101001001111>, ++ LASX_VEC_DESC_BASE<"xvnor.v", LoongArchVNOR, LASX256BOpnd>; ++ ++class XNOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XNOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XNOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XNOR_V_H_PSEUDO : XNOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XNOR_V_W_PSEUDO : XNOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XNOR_V_D_PSEUDO : XNOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVANDN_V : LASX_3R<0b01110101001010000>, ++ LASX_3R_DESC_BASE<"xvandn.v", int_loongarch_lasx_xvandn_v, LASX256BOpnd>; ++ ++ ++class LASX_ANDN_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), ++ []>, ++ PseudoInstExpansion<(XVANDN_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++def XVANDN_H_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++def XVANDN_W_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++def XVANDN_D_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++ ++ ++def XVORN_V : LASX_3R<0b01110101001010001>, ++ LASX_3R_DESC_BASE<"xvorn.v", int_loongarch_lasx_xvorn_v, LASX256BOpnd>; ++ ++ ++class LASX_ORN_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), ++ []>, ++ PseudoInstExpansion<(XVORN_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++def XVORN_H_PSEUDO : LASX_ORN_PSEUDO_BASE; ++def XVORN_W_PSEUDO : LASX_ORN_PSEUDO_BASE; ++def XVORN_D_PSEUDO : LASX_ORN_PSEUDO_BASE; ++ ++ ++def XVFRSTP_B : LASX_3R<0b01110101001010110>, ++ LASX_3R_4R_DESC_BASE<"xvfrstp.b", int_loongarch_lasx_xvfrstp_b, LASX256BOpnd>; ++ ++def XVFRSTP_H : LASX_3R<0b01110101001010111>, ++ LASX_3R_4R_DESC_BASE<"xvfrstp.h", int_loongarch_lasx_xvfrstp_h, LASX256HOpnd>; ++ ++ ++def XVADD_Q : LASX_3R<0b01110101001011010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.q", int_loongarch_lasx_xvadd_q, LASX256DOpnd>; ++ ++def XVSUB_Q : LASX_3R<0b01110101001011011>, ++ LASX_3R_DESC_BASE<"xvsub.q", int_loongarch_lasx_xvsub_q, LASX256DOpnd>; ++ ++ ++def XVSIGNCOV_B : LASX_3R<0b01110101001011100>, ++ LASX_3R_DESC_BASE<"xvsigncov.b", int_loongarch_lasx_xvsigncov_b, LASX256BOpnd>; ++ ++def XVSIGNCOV_H : LASX_3R<0b01110101001011101>, ++ LASX_3R_DESC_BASE<"xvsigncov.h", int_loongarch_lasx_xvsigncov_h, LASX256HOpnd>; ++ ++def XVSIGNCOV_W : LASX_3R<0b01110101001011110>, ++ LASX_3R_DESC_BASE<"xvsigncov.w", int_loongarch_lasx_xvsigncov_w, LASX256WOpnd>; ++ ++def XVSIGNCOV_D : LASX_3R<0b01110101001011111>, ++ LASX_3R_DESC_BASE<"xvsigncov.d", int_loongarch_lasx_xvsigncov_d, LASX256DOpnd>; ++ ++ ++def XVFADD_S : LASX_3R<0b01110101001100001>, IsCommutable, ++ LASX_3RF_DESC_BASE<"xvfadd.s", fadd, LASX256WOpnd>; ++ ++def XVFADD_D : LASX_3R<0b01110101001100010>, IsCommutable, ++ LASX_3RF_DESC_BASE<"xvfadd.d", fadd, LASX256DOpnd>; ++ ++ ++def XVFSUB_S : LASX_3R<0b01110101001100101>, ++ LASX_3RF_DESC_BASE<"xvfsub.s", fsub, LASX256WOpnd>; ++ ++def XVFSUB_D : LASX_3R<0b01110101001100110>, ++ LASX_3RF_DESC_BASE<"xvfsub.d", fsub, LASX256DOpnd>; ++ ++ ++def XVFMUL_S : LASX_3R<0b01110101001110001>, ++ LASX_3RF_DESC_BASE<"xvfmul.s", fmul, LASX256WOpnd>; ++ ++def XVFMUL_D : LASX_3R<0b01110101001110010>, ++ LASX_3RF_DESC_BASE<"xvfmul.d", fmul, LASX256DOpnd>; ++ ++ ++def XVFDIV_S : LASX_3R<0b01110101001110101>, ++ LASX_3RF_DESC_BASE<"xvfdiv.s", fdiv, LASX256WOpnd>; ++ ++def XVFDIV_D : LASX_3R<0b01110101001110110>, ++ LASX_3RF_DESC_BASE<"xvfdiv.d", fdiv, LASX256DOpnd>; ++ ++ ++def XVFMAX_S : LASX_3R<0b01110101001111001>, ++ LASX_3RF_DESC_BASE<"xvfmax.s", int_loongarch_lasx_xvfmax_s, LASX256WOpnd>; ++ ++def XVFMAX_D : LASX_3R<0b01110101001111010>, ++ LASX_3RF_DESC_BASE<"xvfmax.d", int_loongarch_lasx_xvfmax_d, LASX256DOpnd>; ++ ++ ++def XVFMIN_S : LASX_3R<0b01110101001111101>, ++ LASX_3RF_DESC_BASE<"xvfmin.s", int_loongarch_lasx_xvfmin_s, LASX256WOpnd>; ++ ++def XVFMIN_D : LASX_3R<0b01110101001111110>, ++ LASX_3RF_DESC_BASE<"xvfmin.d", int_loongarch_lasx_xvfmin_d, LASX256DOpnd>; ++ ++ ++def XVFMAXA_S : LASX_3R<0b01110101010000001>, ++ LASX_3RF_DESC_BASE<"xvfmaxa.s", int_loongarch_lasx_xvfmaxa_s, LASX256WOpnd>; ++ ++def XVFMAXA_D : LASX_3R<0b01110101010000010>, ++ LASX_3RF_DESC_BASE<"xvfmaxa.d", int_loongarch_lasx_xvfmaxa_d, LASX256DOpnd>; ++ ++ ++def XVFMINA_S : LASX_3R<0b01110101010000101>, ++ LASX_3RF_DESC_BASE<"xvfmina.s", int_loongarch_lasx_xvfmina_s, LASX256WOpnd>; ++ ++def XVFMINA_D : LASX_3R<0b01110101010000110>, ++ LASX_3RF_DESC_BASE<"xvfmina.d", int_loongarch_lasx_xvfmina_d, LASX256DOpnd>; ++ ++ ++def XVFCVT_H_S : LASX_3R<0b01110101010001100>, ++ LASX_3RF_DESC_BASE<"xvfcvt.h.s", int_loongarch_lasx_xvfcvt_h_s, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVFCVT_S_D : LASX_3R<0b01110101010001101>, ++ LASX_3RF_DESC_BASE1<"xvfcvt.s.d", int_loongarch_lasx_xvfcvt_s_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVFFINT_S_L : LASX_3R<0b01110101010010000>, ++ LASX_3RF_DESC_BASE<"xvffint.s.l", int_loongarch_lasx_xvffint_s_l, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINT_W_D : LASX_3R<0b01110101010010011>, ++ LASX_3RF_DESC_BASE<"xvftint.w.d", int_loongarch_lasx_xvftint_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVFTINTRM_W_D : LASX_3R<0b01110101010010100>, ++ LASX_3RF_DESC_BASE<"xvftintrm.w.d", int_loongarch_lasx_xvftintrm_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRP_W_D : LASX_3R<0b01110101010010101>, ++ LASX_3RF_DESC_BASE<"xvftintrp.w.d", int_loongarch_lasx_xvftintrp_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRZ_W_D : LASX_3R<0b01110101010010110>, ++ LASX_3RF_DESC_BASE<"xvftintrz.w.d", int_loongarch_lasx_xvftintrz_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRNE_W_D : LASX_3R<0b01110101010010111>, ++ LASX_3RF_DESC_BASE<"xvftintrne.w.d", int_loongarch_lasx_xvftintrne_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSHUF_H : LASX_3R<0b01110101011110101>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.h", LASX256HOpnd>; ++ ++def XVSHUF_W : LASX_3R<0b01110101011110110>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.w", LASX256WOpnd>; ++ ++def XVSHUF_D : LASX_3R<0b01110101011110111>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.d", LASX256DOpnd>; ++ ++ ++def XVPERM_W : LASX_3R<0b01110101011111010>, ++ LASX_3R_DESC_BASE<"xvperm.w", int_loongarch_lasx_xvperm_w, LASX256WOpnd>; ++ ++ ++def XVSEQI_B : LASX_I5<0b01110110100000000>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.b", int_loongarch_lasx_xvseqi_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSEQI_H : LASX_I5<0b01110110100000001>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.h", int_loongarch_lasx_xvseqi_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSEQI_W : LASX_I5<0b01110110100000010>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.w", int_loongarch_lasx_xvseqi_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSEQI_D : LASX_I5<0b01110110100000011>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.d", int_loongarch_lasx_xvseqi_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLEI_B : LASX_I5<0b01110110100000100>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.b", int_loongarch_lasx_xvslei_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSLEI_H : LASX_I5<0b01110110100000101>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.h", int_loongarch_lasx_xvslei_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSLEI_W : LASX_I5<0b01110110100000110>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.w", int_loongarch_lasx_xvslei_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSLEI_D : LASX_I5<0b01110110100000111>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.d", int_loongarch_lasx_xvslei_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLEI_BU : LASX_I5_U<0b01110110100001000>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.bu", int_loongarch_lasx_xvslei_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSLEI_HU : LASX_I5_U<0b01110110100001001>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.hu", int_loongarch_lasx_xvslei_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSLEI_WU : LASX_I5_U<0b01110110100001010>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.wu", int_loongarch_lasx_xvslei_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLEI_DU : LASX_I5_U<0b01110110100001011>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.du", int_loongarch_lasx_xvslei_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVSLTI_B : LASX_I5<0b01110110100001100>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.b", int_loongarch_lasx_xvslti_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSLTI_H : LASX_I5<0b01110110100001101>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.h", int_loongarch_lasx_xvslti_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSLTI_W : LASX_I5<0b01110110100001110>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.w", int_loongarch_lasx_xvslti_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSLTI_D : LASX_I5<0b01110110100001111>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.d", int_loongarch_lasx_xvslti_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLTI_BU : LASX_I5_U<0b01110110100010000>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.bu", int_loongarch_lasx_xvslti_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSLTI_HU : LASX_I5_U<0b01110110100010001>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.hu", int_loongarch_lasx_xvslti_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSLTI_WU : LASX_I5_U<0b01110110100010010>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.wu", int_loongarch_lasx_xvslti_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLTI_DU : LASX_I5_U<0b01110110100010011>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.du", int_loongarch_lasx_xvslti_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVADDI_BU : LASX_I5_U<0b01110110100010100>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.bu", int_loongarch_lasx_xvaddi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVADDI_HU : LASX_I5_U<0b01110110100010101>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.hu", int_loongarch_lasx_xvaddi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVADDI_WU : LASX_I5_U<0b01110110100010110>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.wu", int_loongarch_lasx_xvaddi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVADDI_DU : LASX_I5_U<0b01110110100010111>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.du", int_loongarch_lasx_xvaddi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVSUBI_BU : LASX_I5_U<0b01110110100011000>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.bu", int_loongarch_lasx_xvsubi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSUBI_HU : LASX_I5_U<0b01110110100011001>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.hu", int_loongarch_lasx_xvsubi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSUBI_WU : LASX_I5_U<0b01110110100011010>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.wu", int_loongarch_lasx_xvsubi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSUBI_DU : LASX_I5_U<0b01110110100011011>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.du", int_loongarch_lasx_xvsubi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVBSLL_V : LASX_I5_U<0b01110110100011100>, ++ LASX_U5_DESC_BASE<"xvbsll.v", int_loongarch_lasx_xvbsll_v, LASX256BOpnd>; ++ ++def XVBSRL_V : LASX_I5_U<0b01110110100011101>, ++ LASX_U5_DESC_BASE<"xvbsrl.v", int_loongarch_lasx_xvbsrl_v, LASX256BOpnd>; ++ ++ ++def XVMAXI_B : LASX_I5<0b01110110100100000>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.b", int_loongarch_lasx_xvmaxi_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVMAXI_H : LASX_I5<0b01110110100100001>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.h", int_loongarch_lasx_xvmaxi_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVMAXI_W : LASX_I5<0b01110110100100010>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.w", int_loongarch_lasx_xvmaxi_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVMAXI_D : LASX_I5<0b01110110100100011>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.d", int_loongarch_lasx_xvmaxi_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVMINI_B : LASX_I5<0b01110110100100100>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.b", int_loongarch_lasx_xvmini_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVMINI_H : LASX_I5<0b01110110100100101>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.h", int_loongarch_lasx_xvmini_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVMINI_W : LASX_I5<0b01110110100100110>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.w", int_loongarch_lasx_xvmini_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVMINI_D : LASX_I5<0b01110110100100111>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.d", int_loongarch_lasx_xvmini_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVMAXI_BU : LASX_I5_U<0b01110110100101000>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.bu", int_loongarch_lasx_xvmaxi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVMAXI_HU : LASX_I5_U<0b01110110100101001>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.hu", int_loongarch_lasx_xvmaxi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVMAXI_WU : LASX_I5_U<0b01110110100101010>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.wu", int_loongarch_lasx_xvmaxi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVMAXI_DU : LASX_I5_U<0b01110110100101011>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.du", int_loongarch_lasx_xvmaxi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVMINI_BU : LASX_I5_U<0b01110110100101100>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.bu", int_loongarch_lasx_xvmini_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVMINI_HU : LASX_I5_U<0b01110110100101101>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.hu", int_loongarch_lasx_xvmini_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVMINI_WU : LASX_I5_U<0b01110110100101110>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.wu", int_loongarch_lasx_xvmini_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVMINI_DU : LASX_I5_U<0b01110110100101111>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.du", int_loongarch_lasx_xvmini_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVFRSTPI_B : LASX_I5_U<0b01110110100110100>, ++ LASX_U5_4R_DESC_BASE<"xvfrstpi.b", int_loongarch_lasx_xvfrstpi_b, LASX256BOpnd>; ++ ++def XVFRSTPI_H : LASX_I5_U<0b01110110100110101>, ++ LASX_U5_4R_DESC_BASE<"xvfrstpi.h", int_loongarch_lasx_xvfrstpi_h, LASX256HOpnd>; ++ ++ ++def XVCLO_B : LASX_2R<0b0111011010011100000000>, ++ LASX_2R_DESC_BASE<"xvclo.b", int_loongarch_lasx_xvclo_b, LASX256BOpnd>; ++ ++def XVCLO_H : LASX_2R<0b0111011010011100000001>, ++ LASX_2R_DESC_BASE<"xvclo.h", int_loongarch_lasx_xvclo_h, LASX256HOpnd>; ++ ++def XVCLO_W : LASX_2R<0b0111011010011100000010>, ++ LASX_2R_DESC_BASE<"xvclo.w", int_loongarch_lasx_xvclo_w, LASX256WOpnd>; ++ ++def XVCLO_D : LASX_2R<0b0111011010011100000011>, ++ LASX_2R_DESC_BASE<"xvclo.d", int_loongarch_lasx_xvclo_d, LASX256DOpnd>; ++ ++ ++def XVCLZ_B : LASX_2R<0b0111011010011100000100>, ++ LASX_2R_DESC_BASE<"xvclz.b", ctlz, LASX256BOpnd>; ++ ++def XVCLZ_H : LASX_2R<0b0111011010011100000101>, ++ LASX_2R_DESC_BASE<"xvclz.h", ctlz, LASX256HOpnd>; ++ ++def XVCLZ_W : LASX_2R<0b0111011010011100000110>, ++ LASX_2R_DESC_BASE<"xvclz.w", ctlz, LASX256WOpnd>; ++ ++def XVCLZ_D : LASX_2R<0b0111011010011100000111>, ++ LASX_2R_DESC_BASE<"xvclz.d", ctlz, LASX256DOpnd>; ++ ++ ++def XVPCNT_B : LASX_2R<0b0111011010011100001000>, ++ LASX_2R_DESC_BASE<"xvpcnt.b", ctpop, LASX256BOpnd>; ++ ++def XVPCNT_H : LASX_2R<0b0111011010011100001001>, ++ LASX_2R_DESC_BASE<"xvpcnt.h", ctpop, LASX256HOpnd>; ++ ++def XVPCNT_W : LASX_2R<0b0111011010011100001010>, ++ LASX_2R_DESC_BASE<"xvpcnt.w", ctpop, LASX256WOpnd>; ++ ++def XVPCNT_D : LASX_2R<0b0111011010011100001011>, ++ LASX_2R_DESC_BASE<"xvpcnt.d", ctpop, LASX256DOpnd>; ++ ++ ++def XVNEG_B : LASX_2R<0b0111011010011100001100>, ++ LASX_2R_DESC_BASE<"xvneg.b", int_loongarch_lasx_xvneg_b, LASX256BOpnd>; ++ ++def XVNEG_H : LASX_2R<0b0111011010011100001101>, ++ LASX_2R_DESC_BASE<"xvneg.h", int_loongarch_lasx_xvneg_h, LASX256HOpnd>; ++ ++def XVNEG_W : LASX_2R<0b0111011010011100001110>, ++ LASX_2R_DESC_BASE<"xvneg.w", int_loongarch_lasx_xvneg_w, LASX256WOpnd>; ++ ++def XVNEG_D : LASX_2R<0b0111011010011100001111>, ++ LASX_2R_DESC_BASE<"xvneg.d", int_loongarch_lasx_xvneg_d, LASX256DOpnd>; ++ ++ ++def XVMSKLTZ_B : LASX_2R<0b0111011010011100010000>, ++ LASX_2R_DESC_BASE<"xvmskltz.b", int_loongarch_lasx_xvmskltz_b, LASX256BOpnd>; ++ ++def XVMSKLTZ_H : LASX_2R<0b0111011010011100010001>, ++ LASX_2R_DESC_BASE<"xvmskltz.h", int_loongarch_lasx_xvmskltz_h, LASX256HOpnd>; ++ ++def XVMSKLTZ_W : LASX_2R<0b0111011010011100010010>, ++ LASX_2R_DESC_BASE<"xvmskltz.w", int_loongarch_lasx_xvmskltz_w, LASX256WOpnd>; ++ ++def XVMSKLTZ_D : LASX_2R<0b0111011010011100010011>, ++ LASX_2R_DESC_BASE<"xvmskltz.d", int_loongarch_lasx_xvmskltz_d, LASX256DOpnd>; ++ ++ ++def XVMSKGEZ_B : LASX_2R<0b0111011010011100010100>, ++ LASX_2R_DESC_BASE<"xvmskgez.b", int_loongarch_lasx_xvmskgez_b, LASX256BOpnd>; ++ ++def XVMSKNZ_B : LASX_2R<0b0111011010011100011000>, ++ LASX_2R_DESC_BASE<"xvmsknz.b", int_loongarch_lasx_xvmsknz_b, LASX256BOpnd>; ++ ++ ++def XVSETEQZ_V : LASX_SET<0b0111011010011100100110>, ++ LASX_SET_DESC_BASE<"xvseteqz.v", LASX256BOpnd>; ++ ++def XVSETNEZ_V : LASX_SET<0b0111011010011100100111>, ++ LASX_SET_DESC_BASE<"xvsetnez.v", LASX256BOpnd>; ++ ++ ++def XVSETANYEQZ_B : LASX_SET<0b0111011010011100101000>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.b", LASX256BOpnd>; ++ ++def XVSETANYEQZ_H : LASX_SET<0b0111011010011100101001>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.h", LASX256HOpnd>; ++ ++def XVSETANYEQZ_W : LASX_SET<0b0111011010011100101010>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.w", LASX256WOpnd>; ++ ++def XVSETANYEQZ_D : LASX_SET<0b0111011010011100101011>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.d", LASX256DOpnd>; ++ ++ ++def XVSETALLNEZ_B : LASX_SET<0b0111011010011100101100>, ++ LASX_SET_DESC_BASE<"xvsetallnez.b", LASX256BOpnd>; ++ ++def XVSETALLNEZ_H : LASX_SET<0b0111011010011100101101>, ++ LASX_SET_DESC_BASE<"xvsetallnez.h", LASX256HOpnd>; ++ ++def XVSETALLNEZ_W : LASX_SET<0b0111011010011100101110>, ++ LASX_SET_DESC_BASE<"xvsetallnez.w", LASX256WOpnd>; ++ ++def XVSETALLNEZ_D : LASX_SET<0b0111011010011100101111>, ++ LASX_SET_DESC_BASE<"xvsetallnez.d", LASX256DOpnd>; ++ ++class LASX_CBRANCH_PSEUDO_DESC_BASE : ++ LoongArchPseudo<(outs GPR32:$dst), ++ (ins RCWS:$xj), ++ [(set GPR32:$dst, (OpNode (TyNode RCWS:$xj)))]> { ++ bit usesCustomInserter = 1; ++} ++ ++def XSNZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++ ++def XSZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++ ++ ++def XVFLOGB_S : LASX_2R<0b0111011010011100110001>, ++ LASX_2RF_DESC_BASE<"xvflogb.s", int_loongarch_lasx_xvflogb_s, LASX256WOpnd>; ++ ++def XVFLOGB_D : LASX_2R<0b0111011010011100110010>, ++ LASX_2RF_DESC_BASE<"xvflogb.d", int_loongarch_lasx_xvflogb_d, LASX256DOpnd>; ++ ++ ++def XVFCLASS_S : LASX_2R<0b0111011010011100110101>, ++ LASX_2RF_DESC_BASE<"xvfclass.s", int_loongarch_lasx_xvfclass_s, LASX256WOpnd>; ++ ++def XVFCLASS_D : LASX_2R<0b0111011010011100110110>, ++ LASX_2RF_DESC_BASE<"xvfclass.d", int_loongarch_lasx_xvfclass_d, LASX256DOpnd>; ++ ++ ++def XVFSQRT_S : LASX_2R<0b0111011010011100111001>, ++ LASX_2RF_DESC_BASE<"xvfsqrt.s", fsqrt, LASX256WOpnd>; ++ ++def XVFSQRT_D : LASX_2R<0b0111011010011100111010>, ++ LASX_2RF_DESC_BASE<"xvfsqrt.d", fsqrt, LASX256DOpnd>; ++ ++ ++def XVFRECIP_S : LASX_2R<0b0111011010011100111101>, ++ LASX_2RF_DESC_BASE<"xvfrecip.s", int_loongarch_lasx_xvfrecip_s, LASX256WOpnd>; ++ ++def XVFRECIP_D : LASX_2R<0b0111011010011100111110>, ++ LASX_2RF_DESC_BASE<"xvfrecip.d", int_loongarch_lasx_xvfrecip_d, LASX256DOpnd>; ++ ++ ++def XVFRSQRT_S : LASX_2R<0b0111011010011101000001>, ++ LASX_2RF_DESC_BASE<"xvfrsqrt.s", int_loongarch_lasx_xvfrsqrt_s, LASX256WOpnd>; ++ ++def XVFRSQRT_D : LASX_2R<0b0111011010011101000010>, ++ LASX_2RF_DESC_BASE<"xvfrsqrt.d", int_loongarch_lasx_xvfrsqrt_d, LASX256DOpnd>; ++ ++ ++def XVFRINT_S : LASX_2R<0b0111011010011101001101>, ++ LASX_2RF_DESC_BASE<"xvfrint.s", frint, LASX256WOpnd>; ++ ++def XVFRINT_D : LASX_2R<0b0111011010011101001110>, ++ LASX_2RF_DESC_BASE<"xvfrint.d", frint, LASX256DOpnd>; ++ ++ ++def XVFRINTRM_S : LASX_2R<0b0111011010011101010001>, ++ LASX_2RF_DESC_BASE<"xvfrintrm.s", int_loongarch_lasx_xvfrintrm_s, LASX256WOpnd>; ++ ++def XVFRINTRM_D : LASX_2R<0b0111011010011101010010>, ++ LASX_2RF_DESC_BASE<"xvfrintrm.d", int_loongarch_lasx_xvfrintrm_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRP_S : LASX_2R<0b0111011010011101010101>, ++ LASX_2RF_DESC_BASE<"xvfrintrp.s", int_loongarch_lasx_xvfrintrp_s, LASX256WOpnd>; ++ ++def XVFRINTRP_D : LASX_2R<0b0111011010011101010110>, ++ LASX_2RF_DESC_BASE<"xvfrintrp.d", int_loongarch_lasx_xvfrintrp_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRZ_S : LASX_2R<0b0111011010011101011001>, ++ LASX_2RF_DESC_BASE<"xvfrintrz.s", int_loongarch_lasx_xvfrintrz_s, LASX256WOpnd>; ++ ++def XVFRINTRZ_D : LASX_2R<0b0111011010011101011010>, ++ LASX_2RF_DESC_BASE<"xvfrintrz.d", int_loongarch_lasx_xvfrintrz_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRNE_S : LASX_2R<0b0111011010011101011101>, ++ LASX_2RF_DESC_BASE<"xvfrintrne.s", int_loongarch_lasx_xvfrintrne_s, LASX256WOpnd>; ++ ++def XVFRINTRNE_D : LASX_2R<0b0111011010011101011110>, ++ LASX_2RF_DESC_BASE<"xvfrintrne.d", int_loongarch_lasx_xvfrintrne_d, LASX256DOpnd>; ++ ++ ++def XVFCVTL_S_H : LASX_2R<0b0111011010011101111010>, ++ LASX_2RF_DESC_BASE<"xvfcvtl.s.h", int_loongarch_lasx_xvfcvtl_s_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVFCVTH_S_H : LASX_2R<0b0111011010011101111011>, ++ LASX_2RF_DESC_BASE<"xvfcvth.s.h", int_loongarch_lasx_xvfcvth_s_h, LASX256WOpnd, LASX256HOpnd>; ++ ++ ++def XVFCVTL_D_S : LASX_2R<0b0111011010011101111100>, ++ LASX_2RF_DESC_BASE<"xvfcvtl.d.s", int_loongarch_lasx_xvfcvtl_d_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFCVTH_D_S : LASX_2R<0b0111011010011101111101>, ++ LASX_2RF_DESC_BASE<"xvfcvth.d.s", int_loongarch_lasx_xvfcvth_d_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFFINT_S_W : LASX_2R<0b0111011010011110000000>, ++ LASX_2RF_DESC_BASE<"xvffint.s.w", sint_to_fp, LASX256WOpnd>; ++ ++def XVFFINT_S_WU : LASX_2R<0b0111011010011110000001>, ++ LASX_2RF_DESC_BASE<"xvffint.s.wu", uint_to_fp, LASX256WOpnd>; ++ ++ ++def XVFFINT_D_L : LASX_2R<0b0111011010011110000010>, ++ LASX_2RF_DESC_BASE<"xvffint.d.l", sint_to_fp, LASX256DOpnd>; ++ ++def XVFFINT_D_LU : LASX_2R<0b0111011010011110000011>, ++ LASX_2RF_DESC_BASE<"xvffint.d.lu", uint_to_fp, LASX256DOpnd>; ++ ++ ++def XVFFINTL_D_W : LASX_2R<0b0111011010011110000100>, ++ LASX_2RF_DESC_BASE<"xvffintl.d.w", int_loongarch_lasx_xvffintl_d_w, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFFINTH_D_W : LASX_2R<0b0111011010011110000101>, ++ LASX_2RF_DESC_BASE<"xvffinth.d.w", int_loongarch_lasx_xvffinth_d_w, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINT_W_S : LASX_2R<0b0111011010011110001100>, ++ LASX_2RF_DESC_BASE<"xvftint.w.s", int_loongarch_lasx_xvftint_w_s, LASX256WOpnd>; ++ ++def XVFTINT_L_D : LASX_2R<0b0111011010011110001101>, ++ LASX_2RF_DESC_BASE<"xvftint.l.d", int_loongarch_lasx_xvftint_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRM_W_S : LASX_2R<0b0111011010011110001110>, ++ LASX_2RF_DESC_BASE<"xvftintrm.w.s", int_loongarch_lasx_xvftintrm_w_s, LASX256WOpnd>; ++ ++def XVFTINTRM_L_D : LASX_2R<0b0111011010011110001111>, ++ LASX_2RF_DESC_BASE<"xvftintrm.l.d", int_loongarch_lasx_xvftintrm_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRP_W_S : LASX_2R<0b0111011010011110010000>, ++ LASX_2RF_DESC_BASE<"xvftintrp.w.s", int_loongarch_lasx_xvftintrp_w_s, LASX256WOpnd>; ++ ++def XVFTINTRP_L_D : LASX_2R<0b0111011010011110010001>, ++ LASX_2RF_DESC_BASE<"xvftintrp.l.d", int_loongarch_lasx_xvftintrp_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRZ_W_S : LASX_2R<0b0111011010011110010010>, ++ LASX_2RF_DESC_BASE<"xvftintrz.w.s", fp_to_sint, LASX256WOpnd>; ++ ++def XVFTINTRZ_L_D : LASX_2R<0b0111011010011110010011>, ++ LASX_2RF_DESC_BASE<"xvftintrz.l.d", fp_to_sint, LASX256DOpnd>; ++ ++ ++def XVFTINTRNE_W_S : LASX_2R<0b0111011010011110010100>, ++ LASX_2RF_DESC_BASE<"xvftintrne.w.s", int_loongarch_lasx_xvftintrne_w_s, LASX256WOpnd>; ++ ++def XVFTINTRNE_L_D : LASX_2R<0b0111011010011110010101>, ++ LASX_2RF_DESC_BASE<"xvftintrne.l.d", int_loongarch_lasx_xvftintrne_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINT_WU_S : LASX_2R<0b0111011010011110010110>, ++ LASX_2RF_DESC_BASE<"xvftint.wu.s", int_loongarch_lasx_xvftint_wu_s, LASX256WOpnd>; ++ ++def XVFTINT_LU_D : LASX_2R<0b0111011010011110010111>, ++ LASX_2RF_DESC_BASE<"xvftint.lu.d", int_loongarch_lasx_xvftint_lu_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRZ_WU_S : LASX_2R<0b0111011010011110011100>, ++ LASX_2RF_DESC_BASE<"xvftintrz.wu.s", fp_to_uint, LASX256WOpnd>; ++ ++def XVFTINTRZ_LU_D : LASX_2R<0b0111011010011110011101>, ++ LASX_2RF_DESC_BASE<"xvftintrz.lu.d", fp_to_uint, LASX256DOpnd>; ++ ++ ++def XVFTINTL_L_S : LASX_2R<0b0111011010011110100000>, ++ LASX_2RF_DESC_BASE<"xvftintl.l.s", int_loongarch_lasx_xvftintl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTH_L_S : LASX_2R<0b0111011010011110100001>, ++ LASX_2RF_DESC_BASE<"xvftinth.l.s", int_loongarch_lasx_xvftinth_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRML_L_S : LASX_2R<0b0111011010011110100010>, ++ LASX_2RF_DESC_BASE<"xvftintrml.l.s", int_loongarch_lasx_xvftintrml_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRMH_L_S : LASX_2R<0b0111011010011110100011>, ++ LASX_2RF_DESC_BASE<"xvftintrmh.l.s", int_loongarch_lasx_xvftintrmh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRPL_L_S : LASX_2R<0b0111011010011110100100>, ++ LASX_2RF_DESC_BASE<"xvftintrpl.l.s", int_loongarch_lasx_xvftintrpl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRPH_L_S : LASX_2R<0b0111011010011110100101>, ++ LASX_2RF_DESC_BASE<"xvftintrph.l.s", int_loongarch_lasx_xvftintrph_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRZL_L_S : LASX_2R<0b0111011010011110100110>, ++ LASX_2RF_DESC_BASE<"xvftintrzl.l.s", int_loongarch_lasx_xvftintrzl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRZH_L_S : LASX_2R<0b0111011010011110100111>, ++ LASX_2RF_DESC_BASE<"xvftintrzh.l.s", int_loongarch_lasx_xvftintrzh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRNEL_L_S : LASX_2R<0b0111011010011110101000>, ++ LASX_2RF_DESC_BASE<"xvftintrnel.l.s", int_loongarch_lasx_xvftintrnel_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRNEH_L_S : LASX_2R<0b0111011010011110101001>, ++ LASX_2RF_DESC_BASE<"xvftintrneh.l.s", int_loongarch_lasx_xvftintrneh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVEXTH_H_B : LASX_2R<0b0111011010011110111000>, ++ LASX_2R_DESC_BASE<"xvexth.h.b", int_loongarch_lasx_xvexth_h_b, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVEXTH_W_H : LASX_2R<0b0111011010011110111001>, ++ LASX_2R_DESC_BASE<"xvexth.w.h", int_loongarch_lasx_xvexth_w_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVEXTH_D_W : LASX_2R<0b0111011010011110111010>, ++ LASX_2R_DESC_BASE<"xvexth.d.w", int_loongarch_lasx_xvexth_d_w, LASX256DOpnd, LASX256WOpnd> ; ++ ++def XVEXTH_Q_D : LASX_2R<0b0111011010011110111011>, ++ LASX_2R_DESC_BASE<"xvexth.q.d", int_loongarch_lasx_xvexth_q_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVEXTH_HU_BU : LASX_2R<0b0111011010011110111100>, ++ LASX_2R_DESC_BASE<"xvexth.hu.bu", int_loongarch_lasx_xvexth_hu_bu, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVEXTH_WU_HU : LASX_2R<0b0111011010011110111101>, ++ LASX_2R_DESC_BASE<"xvexth.wu.hu", int_loongarch_lasx_xvexth_wu_hu, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVEXTH_DU_WU : LASX_2R<0b0111011010011110111110>, ++ LASX_2R_DESC_BASE<"xvexth.du.wu", int_loongarch_lasx_xvexth_du_wu, LASX256DOpnd, LASX256WOpnd> ; ++ ++def XVEXTH_QU_DU : LASX_2R<0b0111011010011110111111>, ++ LASX_2R_DESC_BASE<"xvexth.qu.du", int_loongarch_lasx_xvexth_qu_du, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVREPLGR2VR_B : LASX_2R_1GP<0b0111011010011111000000>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.b", v32i8, xvsplati8, LASX256BOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_H : LASX_2R_1GP<0b0111011010011111000001>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.h", v16i16, xvsplati16, LASX256HOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_W : LASX_2R_1GP<0b0111011010011111000010>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.w", v8i32, xvsplati32, LASX256WOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_D : LASX_2R_1GP<0b0111011010011111000011>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.d", v4i64, xvsplati64, LASX256DOpnd, GPR64Opnd>; ++ ++ ++def VEXT2XV_H_B : LASX_2R<0b0111011010011111000100>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.h.b", int_loongarch_lasx_vext2xv_h_b, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; ++ ++def VEXT2XV_W_B : LASX_2R<0b0111011010011111000101>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.w.b", int_loongarch_lasx_vext2xv_w_b, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_D_B : LASX_2R<0b0111011010011111000110>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.b", int_loongarch_lasx_vext2xv_d_b, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_W_H : LASX_2R<0b0111011010011111000111>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.w.h", int_loongarch_lasx_vext2xv_w_h, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_D_H : LASX_2R<0b0111011010011111001000>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.h", int_loongarch_lasx_vext2xv_d_h, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_D_W : LASX_2R<0b0111011010011111001001>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.w", int_loongarch_lasx_vext2xv_d_w, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; ++ ++ ++def VEXT2XV_HU_BU : LASX_2R<0b0111011010011111001010>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.hu.bu", int_loongarch_lasx_vext2xv_hu_bu, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; ++ ++def VEXT2XV_WU_BU : LASX_2R<0b0111011010011111001011>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.bu", int_loongarch_lasx_vext2xv_wu_bu, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_DU_BU : LASX_2R<0b0111011010011111001100>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.bu", int_loongarch_lasx_vext2xv_du_bu, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_WU_HU : LASX_2R<0b0111011010011111001101>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.hu", int_loongarch_lasx_vext2xv_wu_hu, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_DU_HU : LASX_2R<0b0111011010011111001110>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.hu", int_loongarch_lasx_vext2xv_du_hu, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_DU_WU : LASX_2R<0b0111011010011111001111>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.wu", int_loongarch_lasx_vext2xv_du_wu, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; ++ ++ ++def XVHSELI_D : LASX_I5_U<0b01110110100111111>, ++ LASX_U5N_DESC_BASE<"xvhseli.d", LASX256DOpnd>; ++ ++ ++def XVROTRI_B : LASX_I3_U<0b0111011010100000001>, ++ LASX_RORI_U3_DESC_BASE_Intrinsic<"xvrotri.b", int_loongarch_lasx_xvrotri_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVROTRI_H : LASX_I4_U<0b011101101010000001>, ++ LASX_RORI_U4_DESC_BASE_Intrinsic<"xvrotri.h", int_loongarch_lasx_xvrotri_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVROTRI_W : LASX_I5_U<0b01110110101000001>, ++ LASX_RORI_U5_DESC_BASE_Intrinsic<"xvrotri.w", int_loongarch_lasx_xvrotri_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVROTRI_D : LASX_I6_U<0b0111011010100001>, ++ LASX_RORI_U6_DESC_BASE_Intrinsic<"xvrotri.d", int_loongarch_lasx_xvrotri_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLRI_B : LASX_I3_U<0b0111011010100100001>, ++ LASX_BIT_3_DESC_BASE<"xvsrlri.b", int_loongarch_lasx_xvsrlri_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRLRI_H : LASX_I4_U<0b011101101010010001>, ++ LASX_BIT_4_DESC_BASE<"xvsrlri.h", int_loongarch_lasx_xvsrlri_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRLRI_W : LASX_I5_U<0b01110110101001001>, ++ LASX_BIT_5_DESC_BASE<"xvsrlri.w", int_loongarch_lasx_xvsrlri_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRLRI_D : LASX_I6_U<0b0111011010100101>, ++ LASX_BIT_6_DESC_BASE<"xvsrlri.d", int_loongarch_lasx_xvsrlri_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRARI_B : LASX_I3_U<0b0111011010101000001>, ++ LASX_BIT_3_DESC_BASE<"xvsrari.b", int_loongarch_lasx_xvsrari_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRARI_H : LASX_I4_U<0b011101101010100001>, ++ LASX_BIT_4_DESC_BASE<"xvsrari.h", int_loongarch_lasx_xvsrari_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRARI_W : LASX_I5_U<0b01110110101010001>, ++ LASX_BIT_5_DESC_BASE<"xvsrari.w", int_loongarch_lasx_xvsrari_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRARI_D : LASX_I6_U<0b0111011010101001>, ++ LASX_BIT_6_DESC_BASE<"xvsrari.d", int_loongarch_lasx_xvsrari_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVINSGR2VR_W : LASX_I3_R_U<0b0111011011101011110>, ++ LASX_INSERT_U3_DESC_BASE<"xvinsgr2vr.w", v8i32, uimm3_ptr, immZExt3Ptr, LASX256WOpnd, GPR32Opnd>; ++ ++def XVINSGR2VR_D : LASX_I2_R_U<0b01110110111010111110>, ++ LASX_INSERT_U2_DESC_BASE<"xvinsgr2vr.d", v4i64, uimm2_ptr, immZExt2Ptr, LASX256DOpnd, GPR64Opnd>; ++ ++ ++def XVPICKVE2GR_W : LASX_ELM_COPY_U3<0b0111011011101111110>, ++ LASX_COPY_U3_DESC_BASE<"xvpickve2gr.w", vextract_sext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; ++ ++def XVPICKVE2GR_D : LASX_ELM_COPY_U2<0b01110110111011111110>, ++ LASX_COPY_U2_DESC_BASE<"xvpickve2gr.d", vextract_sext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; ++ ++ ++def XVPICKVE2GR_WU : LASX_ELM_COPY_U3<0b0111011011110011110>, ++ LASX_COPY_U3_DESC_BASE<"xvpickve2gr.wu", vextract_zext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; ++ ++def XVPICKVE2GR_DU : LASX_ELM_COPY_U2<0b01110110111100111110>, ++ LASX_COPY_U2_DESC_BASE<"xvpickve2gr.du", vextract_zext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; ++ ++ ++def XVREPL128VEI_B : LASX_I4_U<0b011101101111011110>, ++ LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.b", int_loongarch_lasx_xvrepl128vei_b, LASX256BOpnd>; ++ ++def XVREPL128VEI_H : LASX_I3_U<0b0111011011110111110>, ++ LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.h", int_loongarch_lasx_xvrepl128vei_h, LASX256HOpnd>; ++ ++def XVREPL128VEI_W : LASX_I2_U<0b01110110111101111110>, ++ LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.w", int_loongarch_lasx_xvrepl128vei_w, LASX256WOpnd>; ++ ++def XVREPL128VEI_D : LASX_I1_U<0b011101101111011111110>, ++ LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.d", int_loongarch_lasx_xvrepl128vei_d, LASX256DOpnd>; ++ ++ ++def XVINSVE0_W : LASX_I3_U<0b0111011011111111110>, ++ LASX_BIT_3_4O_DESC_BASE<"xvinsve0.w", int_loongarch_lasx_xvinsve0_w, uimm3, immZExt3, LASX256WOpnd>; ++ ++def XVINSVE0_D : LASX_I2_U<0b01110110111111111110>, ++ LASX_BIT_2_4O_DESC_BASE<"xvinsve0.d", int_loongarch_lasx_xvinsve0_d, uimm2, immZExt2, LASX256DOpnd>; ++ ++ ++def XVPICKVE_W : LASX_I3_U<0b0111011100000011110>, ++ LASX_BIT_3_4ON<"xvpickve.w", uimm3, immZExt3, LASX256WOpnd>; ++ ++def XVPICKVE_D : LASX_I2_U<0b01110111000000111110>, ++ LASX_BIT_2_4ON<"xvpickve.d", uimm2, immZExt2, LASX256DOpnd>; ++ ++ ++def XVREPLVE0_B : LASX_2R<0b0111011100000111000000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.b", int_loongarch_lasx_xvreplve0_b, v32i8, LASX256BOpnd>; ++ ++def XVREPLVE0_H : LASX_2R<0b0111011100000111100000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.h", int_loongarch_lasx_xvreplve0_h, v16i16, LASX256HOpnd>; ++ ++def XVREPLVE0_W : LASX_2R<0b0111011100000111110000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.w", int_loongarch_lasx_xvreplve0_w, v8i32, LASX256WOpnd> ; ++ ++def XVREPLVE0_D : LASX_2R<0b0111011100000111111000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd>; ++ ++def XVREPLVE0_Q : LASX_2R<0b0111011100000111111100>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.q", int_loongarch_lasx_xvreplve0_q, v32i8, LASX256BOpnd>; ++ ++ ++def XVSLLWIL_H_B : LASX_I3_U<0b0111011100001000001>, ++ LASX_2R_U3_DESC_BASE<"xvsllwil.h.b", int_loongarch_lasx_xvsllwil_h_b, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVSLLWIL_W_H : LASX_I4_U<0b011101110000100001>, ++ LASX_2R_U4_DESC_BASE<"xvsllwil.w.h", int_loongarch_lasx_xvsllwil_w_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVSLLWIL_D_W : LASX_I5_U<0b01110111000010001>, ++ LASX_2R_U5_DESC_BASE<"xvsllwil.d.w", int_loongarch_lasx_xvsllwil_d_w, LASX256DOpnd, LASX256WOpnd> ; ++ ++ ++def XVEXTL_Q_D : LASX_2R<0b0111011100001001000000>, ++ LASX_2R_DESC_BASE<"xvextl.q.d", int_loongarch_lasx_xvextl_q_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSLLWIL_HU_BU : LASX_I3_U<0b0111011100001100001>, ++ LASX_2R_U3_DESC_BASE<"xvsllwil.hu.bu", int_loongarch_lasx_xvsllwil_hu_bu, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVSLLWIL_WU_HU : LASX_I4_U<0b011101110000110001>, ++ LASX_2R_U4_DESC_BASE<"xvsllwil.wu.hu", int_loongarch_lasx_xvsllwil_wu_hu, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVSLLWIL_DU_WU : LASX_I5_U<0b01110111000011001>, ++ LASX_2R_U5_DESC_BASE<"xvsllwil.du.wu", int_loongarch_lasx_xvsllwil_du_wu, LASX256DOpnd, LASX256WOpnd> ; ++ ++ ++def XVEXTL_QU_DU : LASX_2R<0b0111011100001101000000>, ++ LASX_2R_DESC_BASE<"xvextl.qu.du", int_loongarch_lasx_xvextl_qu_du, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITCLRI_B : LASX_I3_U<0b0111011100010000001>, ++ LASX_2R_U3_DESC_BASE<"xvbitclri.b", int_loongarch_lasx_xvbitclri_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITCLRI_H : LASX_I4_U<0b011101110001000001>, ++ LASX_2R_U4_DESC_BASE<"xvbitclri.h", int_loongarch_lasx_xvbitclri_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITCLRI_W : LASX_I5_U<0b01110111000100001>, ++ LASX_2R_U5_DESC_BASE<"xvbitclri.w", int_loongarch_lasx_xvbitclri_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITCLRI_D : LASX_I6_U<0b0111011100010001>, ++ LASX_2R_U6_DESC_BASE<"xvbitclri.d", int_loongarch_lasx_xvbitclri_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITSETI_B : LASX_I3_U<0b0111011100010100001>, ++ LASX_2R_U3_DESC_BASE<"xvbitseti.b", int_loongarch_lasx_xvbitseti_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITSETI_H : LASX_I4_U<0b011101110001010001>, ++ LASX_2R_U4_DESC_BASE<"xvbitseti.h", int_loongarch_lasx_xvbitseti_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITSETI_W : LASX_I5_U<0b01110111000101001>, ++ LASX_2R_U5_DESC_BASE<"xvbitseti.w", int_loongarch_lasx_xvbitseti_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITSETI_D : LASX_I6_U<0b0111011100010101>, ++ LASX_2R_U6_DESC_BASE<"xvbitseti.d", int_loongarch_lasx_xvbitseti_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITREVI_B : LASX_I3_U<0b0111011100011000001>, ++ LASX_2R_U3_DESC_BASE<"xvbitrevi.b", int_loongarch_lasx_xvbitrevi_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITREVI_H : LASX_I4_U<0b011101110001100001>, ++ LASX_2R_U4_DESC_BASE<"xvbitrevi.h", int_loongarch_lasx_xvbitrevi_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITREVI_W : LASX_I5_U<0b01110111000110001>, ++ LASX_2R_U5_DESC_BASE<"xvbitrevi.w", int_loongarch_lasx_xvbitrevi_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITREVI_D : LASX_I6_U<0b0111011100011001>, ++ LASX_2R_U6_DESC_BASE<"xvbitrevi.d", int_loongarch_lasx_xvbitrevi_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSAT_B : LASX_I3_U<0b0111011100100100001>, ++ LASX_BIT_3_DESC_BASE<"xvsat.b", int_loongarch_lasx_xvsat_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSAT_H : LASX_I4_U<0b011101110010010001>, ++ LASX_BIT_4_DESC_BASE<"xvsat.h", int_loongarch_lasx_xvsat_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSAT_W : LASX_I5_U<0b01110111001001001>, ++ LASX_BIT_5_DESC_BASE<"xvsat.w", int_loongarch_lasx_xvsat_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSAT_D : LASX_I6_U<0b0111011100100101>, ++ LASX_BIT_6_DESC_BASE<"xvsat.d", int_loongarch_lasx_xvsat_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSAT_BU : LASX_I3_U<0b0111011100101000001>, ++ LASX_BIT_3_DESC_BASE<"xvsat.bu", int_loongarch_lasx_xvsat_bu, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSAT_HU : LASX_I4_U<0b011101110010100001>, ++ LASX_BIT_4_DESC_BASE<"xvsat.hu", int_loongarch_lasx_xvsat_hu, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSAT_WU : LASX_I5_U<0b01110111001010001>, ++ LASX_BIT_5_DESC_BASE<"xvsat.wu", int_loongarch_lasx_xvsat_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSAT_DU : LASX_I6_U<0b0111011100101001>, ++ LASX_BIT_6_DESC_BASE<"xvsat.du", int_loongarch_lasx_xvsat_du, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSLLI_B : LASX_I3_U<0b0111011100101100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvslli.b", int_loongarch_lasx_xvslli_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSLLI_H : LASX_I4_U<0b011101110010110001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvslli.h", int_loongarch_lasx_xvslli_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSLLI_W : LASX_I5_U<0b01110111001011001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvslli.w", int_loongarch_lasx_xvslli_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLLI_D : LASX_I6_U<0b0111011100101101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvslli.d", int_loongarch_lasx_xvslli_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLI_B : LASX_I3_U<0b0111011100110000001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.b", int_loongarch_lasx_xvsrli_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRLI_H : LASX_I4_U<0b011101110011000001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.h", int_loongarch_lasx_xvsrli_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRLI_W : LASX_I5_U<0b01110111001100001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.w", int_loongarch_lasx_xvsrli_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRLI_D : LASX_I6_U<0b0111011100110001>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.d", int_loongarch_lasx_xvsrli_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRAI_B : LASX_I3_U<0b0111011100110100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.b", int_loongarch_lasx_xvsrai_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRAI_H : LASX_I4_U<0b011101110011010001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.h", int_loongarch_lasx_xvsrai_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRAI_W : LASX_I5_U<0b01110111001101001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.w", int_loongarch_lasx_xvsrai_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRAI_D : LASX_I6_U<0b0111011100110101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.d", int_loongarch_lasx_xvsrai_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLNI_B_H : LASX_I4_U<0b011101110100000001>, ++ LASX_U4_DESC_BASE<"xvsrlni.b.h", int_loongarch_lasx_xvsrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRLNI_H_W : LASX_I5_U<0b01110111010000001>, ++ LASX_N4_U5_DESC_BASE<"xvsrlni.h.w", int_loongarch_lasx_xvsrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRLNI_W_D : LASX_I6_U<0b0111011101000001>, ++ LASX_U6_DESC_BASE<"xvsrlni.w.d", int_loongarch_lasx_xvsrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRLNI_D_Q : LASX_I7_U<0b011101110100001>, ++ LASX_D_DESC_BASE<"xvsrlni.d.q", int_loongarch_lasx_xvsrlni_d_q, LASX256DOpnd>; ++ ++ ++def XVSRLRNI_B_H : LASX_I4_U<0b011101110100010001>, ++ LASX_U4_DESC_BASE<"xvsrlrni.b.h", int_loongarch_lasx_xvsrlrni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRLRNI_H_W : LASX_I5_U<0b01110111010001001>, ++ LASX_N4_U5_DESC_BASE<"xvsrlrni.h.w", int_loongarch_lasx_xvsrlrni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRLRNI_W_D : LASX_I6_U<0b0111011101000101>, ++ LASX_U6_DESC_BASE<"xvsrlrni.w.d", int_loongarch_lasx_xvsrlrni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRLRNI_D_Q : LASX_I7_U<0b011101110100011>, ++ LASX_D_DESC_BASE<"xvsrlrni.d.q", int_loongarch_lasx_xvsrlrni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRLNI_B_H : LASX_I4_U<0b011101110100100001>, ++ LASX_U4_DESC_BASE<"xvssrlni.b.h", int_loongarch_lasx_xvssrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLNI_H_W : LASX_I5_U<0b01110111010010001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlni.h.w", int_loongarch_lasx_xvssrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLNI_W_D : LASX_I6_U<0b0111011101001001>, ++ LASX_U6_DESC_BASE<"xvssrlni.w.d", int_loongarch_lasx_xvssrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLNI_D_Q : LASX_I7_U<0b011101110100101>, ++ LASX_D_DESC_BASE<"xvssrlni.d.q", int_loongarch_lasx_xvssrlni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRLNI_BU_H : LASX_I4_U<0b011101110100110001>, ++ LASX_U4_DESC_BASE<"xvssrlni.bu.h", int_loongarch_lasx_xvssrlni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLNI_HU_W : LASX_I5_U<0b01110111010011001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlni.hu.w", int_loongarch_lasx_xvssrlni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLNI_WU_D : LASX_I6_U<0b0111011101001101>, ++ LASX_U6_DESC_BASE<"xvssrlni.wu.d", int_loongarch_lasx_xvssrlni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLNI_DU_Q : LASX_I7_U<0b011101110100111>, ++ LASX_D_DESC_BASE<"xvssrlni.du.q", int_loongarch_lasx_xvssrlni_du_q, LASX256DOpnd>; ++ ++ ++def XVSSRLRNI_B_H : LASX_I4_U<0b011101110101000001>, ++ LASX_2R_3R_U4_DESC_BASE<"xvssrlrni.b.h", int_loongarch_lasx_xvssrlrni_b_h, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSSRLRNI_H_W : LASX_I5_U<0b01110111010100001>, ++ LASX_2R_3R_U5_DESC_BASE<"xvssrlrni.h.w", int_loongarch_lasx_xvssrlrni_h_w, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRNI_W_D : LASX_I6_U<0b0111011101010001>, ++ LASX_2R_3R_U6_DESC_BASE<"xvssrlrni.w.d", int_loongarch_lasx_xvssrlrni_w_d, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRNI_D_Q : LASX_I7_U<0b011101110101001>, ++ LASX_2R_3R_U7_DESC_BASE<"xvssrlrni.d.q", int_loongarch_lasx_xvssrlrni_d_q, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRNI_BU_H : LASX_I4_U<0b011101110101010001>, ++ LASX_U4_DESC_BASE<"xvssrlrni.bu.h", int_loongarch_lasx_xvssrlrni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLRNI_HU_W : LASX_I5_U<0b01110111010101001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlrni.hu.w", int_loongarch_lasx_xvssrlrni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLRNI_WU_D : LASX_I6_U<0b0111011101010101>, ++ LASX_U6_DESC_BASE<"xvssrlrni.wu.d", int_loongarch_lasx_xvssrlrni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLRNI_DU_Q : LASX_I7_U<0b011101110101011>, ++ LASX_D_DESC_BASE<"xvssrlrni.du.q", int_loongarch_lasx_xvssrlrni_du_q, LASX256DOpnd>; ++ ++ ++def XVSRANI_B_H : LASX_I4_U<0b011101110101100001>, ++ LASX_2R_3R_U4_DESC_BASE<"xvsrani.b.h", int_loongarch_lasx_xvsrani_b_h, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSRANI_H_W : LASX_I5_U<0b01110111010110001>, ++ LASX_2R_3R_U5_DESC_BASE<"xvsrani.h.w", int_loongarch_lasx_xvsrani_h_w, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRANI_W_D : LASX_I6_U<0b0111011101011001>, ++ LASX_2R_3R_U6_DESC_BASE<"xvsrani.w.d", int_loongarch_lasx_xvsrani_w_d, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRANI_D_Q : LASX_I7_U<0b011101110101101>, ++ LASX_2R_3R_U7_DESC_BASE<"xvsrani.d.q", int_loongarch_lasx_xvsrani_d_q, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRARNI_B_H : LASX_I4_U<0b011101110101110001>, ++ LASX_U4_DESC_BASE<"xvsrarni.b.h", int_loongarch_lasx_xvsrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRARNI_H_W : LASX_I5_U<0b01110111010111001>, ++ LASX_N4_U5_DESC_BASE<"xvsrarni.h.w", int_loongarch_lasx_xvsrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRARNI_W_D : LASX_I6_U<0b0111011101011101>, ++ LASX_U6_DESC_BASE<"xvsrarni.w.d", int_loongarch_lasx_xvsrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRARNI_D_Q : LASX_I7_U<0b011101110101111>, ++ LASX_D_DESC_BASE<"xvsrarni.d.q", int_loongarch_lasx_xvsrarni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRANI_B_H : LASX_I4_U<0b011101110110000001>, ++ LASX_U4_DESC_BASE<"xvssrani.b.h", int_loongarch_lasx_xvssrani_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRANI_H_W : LASX_I5_U<0b01110111011000001>, ++ LASX_N4_U5_DESC_BASE<"xvssrani.h.w", int_loongarch_lasx_xvssrani_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRANI_W_D : LASX_I6_U<0b0111011101100001>, ++ LASX_U6_DESC_BASE<"xvssrani.w.d", int_loongarch_lasx_xvssrani_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRANI_D_Q : LASX_I7_U<0b011101110110001>, ++ LASX_D_DESC_BASE<"xvssrani.d.q", int_loongarch_lasx_xvssrani_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRANI_BU_H : LASX_I4_U<0b011101110110010001>, ++ LASX_U4_DESC_BASE<"xvssrani.bu.h", int_loongarch_lasx_xvssrani_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRANI_HU_W : LASX_I5_U<0b01110111011001001>, ++ LASX_N4_U5_DESC_BASE<"xvssrani.hu.w", int_loongarch_lasx_xvssrani_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRANI_WU_D : LASX_I6_U<0b0111011101100101>, ++ LASX_U6_DESC_BASE<"xvssrani.wu.d", int_loongarch_lasx_xvssrani_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRANI_DU_Q : LASX_I7_U<0b011101110110011>, ++ LASX_D_DESC_BASE<"xvssrani.du.q", int_loongarch_lasx_xvssrani_du_q, LASX256DOpnd>; ++ ++ ++def XVSSRARNI_B_H : LASX_I4_U<0b011101110110100001>, ++ LASX_U4_DESC_BASE<"xvssrarni.b.h", int_loongarch_lasx_xvssrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRARNI_H_W : LASX_I5_U<0b01110111011010001>, ++ LASX_N4_U5_DESC_BASE<"xvssrarni.h.w", int_loongarch_lasx_xvssrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRARNI_W_D : LASX_I6_U<0b0111011101101001>, ++ LASX_U6_DESC_BASE<"xvssrarni.w.d", int_loongarch_lasx_xvssrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRARNI_D_Q : LASX_I7_U<0b011101110110101>, ++ LASX_D_DESC_BASE<"xvssrarni.d.q", int_loongarch_lasx_xvssrarni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRARNI_BU_H : LASX_I4_U<0b011101110110110001>, ++ LASX_U4_DESC_BASE<"xvssrarni.bu.h", int_loongarch_lasx_xvssrarni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRARNI_HU_W : LASX_I5_U<0b01110111011011001>, ++ LASX_N4_U5_DESC_BASE<"xvssrarni.hu.w", int_loongarch_lasx_xvssrarni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRARNI_WU_D : LASX_I6_U<0b0111011101101101>, ++ LASX_U6_DESC_BASE<"xvssrarni.wu.d", int_loongarch_lasx_xvssrarni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRARNI_DU_Q : LASX_I7_U<0b011101110110111>, ++ LASX_D_DESC_BASE<"xvssrarni.du.q", int_loongarch_lasx_xvssrarni_du_q, LASX256DOpnd>; ++ ++ ++def XVEXTRINS_B : LASX_I8_U<0b01110111100011>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.b", int_loongarch_lasx_xvextrins_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVEXTRINS_H : LASX_I8_U<0b01110111100010>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.h", int_loongarch_lasx_xvextrins_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVEXTRINS_W : LASX_I8_U<0b01110111100001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.w", int_loongarch_lasx_xvextrins_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVEXTRINS_D : LASX_I8_U<0b01110111100000>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.d", int_loongarch_lasx_xvextrins_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSHUF4I_B : LASX_I8_U<0b01110111100100>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.b", int_loongarch_lasx_xvshuf4i_b, LASX256BOpnd>; ++ ++def XVSHUF4I_H : LASX_I8_U<0b01110111100101>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.h", int_loongarch_lasx_xvshuf4i_h, LASX256HOpnd>; ++ ++def XVSHUF4I_W : LASX_I8_U<0b01110111100110>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.w", int_loongarch_lasx_xvshuf4i_w, LASX256WOpnd>; ++ ++def XVSHUF4I_D : LASX_I8_U<0b01110111100111>, ++ LASX_I8_O4_SHF_DESC_BASE<"xvshuf4i.d", int_loongarch_lasx_xvshuf4i_d, LASX256DOpnd>; ++ ++ ++def XVBITSELI_B : LASX_I8_U<0b01110111110001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvbitseli.b", int_loongarch_lasx_xvbitseli_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVANDI_B : LASX_I8_U<0b01110111110100>, ++ LASX_2R_U8_DESC_BASE<"xvandi.b", int_loongarch_lasx_xvandi_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVORI_B : LASX_I8_U<0b01110111110101>, ++ LASX_2R_U8_DESC_BASE<"xvori.b", int_loongarch_lasx_xvori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVXORI_B : LASX_I8_U<0b01110111110110>, ++ LASX_2R_U8_DESC_BASE<"xvxori.b", int_loongarch_lasx_xvxori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVNORI_B : LASX_I8_U<0b01110111110111>, ++ LASX_2R_U8_DESC_BASE<"xvnori.b", int_loongarch_lasx_xvnori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVLDI : LASX_1R_I13<0b01110111111000>, ++ LASX_I13_DESC_BASE<"xvldi", int_loongarch_lasx_xvldi, i32, simm13Op, LASX256DOpnd>; ++ ++ ++def XVLDI_B : LASX_1R_I13_I10<0b01110111111000000>, ++ LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_b, simm10, immZExt10, LASX256BOpnd>; ++ ++def XVLDI_H : LASX_1R_I13_I10<0b01110111111000001>, ++ LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_h, simm10, immZExt10, LASX256HOpnd>; ++ ++def XVLDI_W : LASX_1R_I13_I10<0b01110111111000010>, ++ LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_w, simm10, immZExt10, LASX256WOpnd>; ++ ++def XVLDI_D : LASX_1R_I13_I10<0b01110111111000011>, ++ LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_d, simm10, immZExt10, LASX256DOpnd>; ++ ++ ++def XVPERMI_W : LASX_I8_U<0b01110111111001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvpermi.w", int_loongarch_lasx_xvpermi_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVPERMI_D : LASX_I8_U<0b01110111111010>, ++ LASX_2R_U8_DESC_BASE<"xvpermi.d", int_loongarch_lasx_xvpermi_d, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVPERMI_Q : LASX_I8_U<0b01110111111011>, ++ LASX_2R_3R_U8_DESC_BASE<"xvpermi.q", int_loongarch_lasx_xvpermi_q, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++//Pat ++ ++class LASXBitconvertPat preds = [HasLASX]> : ++ LASXPat<(DstVT (bitconvert SrcVT:$src)), ++ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; ++ ++// These are endian-independent because the element size doesnt change ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++class LASX_XINSERT_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ImmOp:$n, ROFS:$fs), ++ [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, Imm:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class XINSERT_H_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++class XINSERT_H64_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++def XINSERT_H_PSEUDO : XINSERT_H_PSEUDO_DESC; ++def XINSERT_H64_PSEUDO : XINSERT_H64_PSEUDO_DESC; ++ ++class XINSERT_B_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++def XINSERT_B_PSEUDO : XINSERT_B_PSEUDO_DESC; ++ ++ ++class LASX_COPY_PSEUDO_BASE : ++ LASXPseudo<(outs RCD:$xd), (ins RCWS:$xj, ImmOp:$n), ++ [(set RCD:$xd, (OpNode (VecTy RCWS:$xj), Imm:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class XCOPY_FW_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; ++class XCOPY_FD_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; ++def XCOPY_FW_PSEUDO : XCOPY_FW_PSEUDO_DESC; ++def XCOPY_FD_PSEUDO : XCOPY_FD_PSEUDO_DESC; ++ ++ ++ ++class LASX_XINSERT_VIDX_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ROIdx:$n, ROFS:$fs), ++ [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++class XINSERT_FW_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++def XINSERT_FW_PSEUDO : XINSERT_FW_PSEUDO_DESC; ++ ++class XINSERT_FW_VIDX_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++class XINSERT_FW_VIDX64_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_FW_VIDX_PSEUDO : XINSERT_FW_VIDX_PSEUDO_DESC; ++def XINSERT_FW_VIDX64_PSEUDO : XINSERT_FW_VIDX64_PSEUDO_DESC; ++ ++class XINSERT_B_VIDX64_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_B_VIDX64_PSEUDO : XINSERT_B_VIDX64_PSEUDO_DESC; ++ ++ ++class XINSERT_B_VIDX_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_B_VIDX_PSEUDO : XINSERT_B_VIDX_PSEUDO_DESC; ++ ++ ++class XINSERTPostRA : ++ LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++def XINSERT_B_VIDX_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_B_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_FW_VIDX_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_FW_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; ++ ++class XINSERT_FD_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++def XINSERT_FD_PSEUDO : XINSERT_FD_PSEUDO_DESC; ++ ++class LASX_2R_FILL_PSEUDO_BASE : ++ LASXPseudo<(outs RCWD:$xd), (ins RCWS:$fs), ++ [(set RCWD:$xd, (OpNode RCWS:$fs))]> { ++ let usesCustomInserter = 1; ++} ++ ++class XFILL_FW_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; ++class XFILL_FD_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; ++def XFILL_FW_PSEUDO : XFILL_FW_PSEUDO_DESC; ++def XFILL_FD_PSEUDO : XFILL_FD_PSEUDO_DESC; ++ ++class LASX_CONCAT_VECTORS_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xs, ROXK:$xt), ++ [(set ROXD:$xd, (Ty (concat_vectors (SubTy ROXJ:$xs), (SubTy ROXK:$xt))))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class CONCAT_VECTORS_B_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_H_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_W_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_D_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++ ++class CONCAT_VECTORS_FW_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_FD_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++ ++def CONCAT_VECTORS_B_PSEUDO : CONCAT_VECTORS_B_PSEUDO_DESC; ++def CONCAT_VECTORS_H_PSEUDO : CONCAT_VECTORS_H_PSEUDO_DESC; ++def CONCAT_VECTORS_W_PSEUDO : CONCAT_VECTORS_W_PSEUDO_DESC; ++def CONCAT_VECTORS_D_PSEUDO : CONCAT_VECTORS_D_PSEUDO_DESC; ++def CONCAT_VECTORS_FW_PSEUDO : CONCAT_VECTORS_FW_PSEUDO_DESC; ++def CONCAT_VECTORS_FD_PSEUDO : CONCAT_VECTORS_FD_PSEUDO_DESC; ++ ++ ++class LASX_COPY_GPR_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROFS:$xj, ROIdx:$n), ++ [(set ROXD:$xd, (OpNode (VecTy ROFS:$xj), ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class XCOPY_FW_GPR_PSEUDO_DESC : LASX_COPY_GPR_PSEUDO_BASE; ++def XCOPY_FW_GPR_PSEUDO : XCOPY_FW_GPR_PSEUDO_DESC; ++ ++ ++let isCodeGenOnly = 1 in { ++ ++def XVLD_H : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v16i16, LASX256HOpnd, mem>; ++ ++def XVLD_W : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v8i32, LASX256WOpnd, mem>; ++ ++def XVLD_D : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v4i64, LASX256DOpnd, mem>; ++ ++ ++def XVST_H : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v16i16, LASX256HOpnd, mem_simm12>; ++ ++def XVST_W : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v8i32, LASX256WOpnd, mem_simm12>; ++ ++def XVST_D : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v4i64, LASX256DOpnd, mem_simm12>; ++ ++ ++def XVREPLVE_W_N : LASX_3R_1GP<0b01110101001000110>, ++ LASX_3R_VREPLVE_DESC_BASE_N<"xvreplve.w", LASX256WOpnd>; ++ ++ ++def XVANDI_B_N : LASX_I8_U<0b01110111110100>, ++ LASX_BIT_U8_DESC_BASE<"xvandi.b", and, xvsplati8_uimm8, LASX256BOpnd>; ++ ++ ++def XVXORI_B_N : LASX_I8_U<0b01110111110110>, ++ LASX_BIT_U8_DESC_BASE<"xvxori.b", xor, xvsplati8_uimm8, LASX256BOpnd>; ++ ++ ++def XVSRAI_B_N : LASX_I3_U<0b0111011100110100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrai.b", sra, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSRAI_H_N : LASX_I4_U<0b011101110011010001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrai.h", sra, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSRAI_W_N : LASX_I5_U<0b01110111001101001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrai.w", sra, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSRAI_D_N : LASX_I6_U<0b0111011100110101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrai.d", sra, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVSLLI_B_N : LASX_I3_U<0b0111011100101100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvslli.b", shl, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSLLI_H_N : LASX_I4_U<0b011101110010110001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvslli.h", shl, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSLLI_W_N : LASX_I5_U<0b01110111001011001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvslli.w", shl, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLLI_D_N : LASX_I6_U<0b0111011100101101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvslli.d", shl, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVSRLI_B_N : LASX_I3_U<0b0111011100110000001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrli.b", srl, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSRLI_H_N : LASX_I4_U<0b011101110011000001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrli.h", srl, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSRLI_W_N : LASX_I5_U<0b01110111001100001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrli.w", srl, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSRLI_D_N : LASX_I6_U<0b0111011100110001>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrli.d", srl, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVMAXI_B_N : LASX_I5<0b01110110100100000>, ++ LASX_I5_DESC_BASE<"xvmaxi.b", smax, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVMAXI_H_N : LASX_I5<0b01110110100100001>, ++ LASX_I5_DESC_BASE<"xvmaxi.h", smax, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVMAXI_W_N : LASX_I5<0b01110110100100010>, ++ LASX_I5_DESC_BASE<"xvmaxi.w", smax, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVMAXI_D_N : LASX_I5<0b01110110100100011>, ++ LASX_I5_DESC_BASE<"xvmaxi.d", smax, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVMINI_B_N : LASX_I5<0b01110110100100100>, ++ LASX_I5_DESC_BASE<"xvmini.b", smin, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVMINI_H_N : LASX_I5<0b01110110100100101>, ++ LASX_I5_DESC_BASE<"xvmini.h", smin, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVMINI_W_N : LASX_I5<0b01110110100100110>, ++ LASX_I5_DESC_BASE<"xvmini.w", smin, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVMINI_D_N : LASX_I5<0b01110110100100111>, ++ LASX_I5_DESC_BASE<"xvmini.d", smin, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVMAXI_BU_N : LASX_I5_U<0b01110110100101000>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.bu", umax, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVMAXI_HU_N : LASX_I5_U<0b01110110100101001>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.hu", umax, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVMAXI_WU_N : LASX_I5_U<0b01110110100101010>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.wu", umax, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVMAXI_DU_N : LASX_I5_U<0b01110110100101011>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.du", umax, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVMINI_BU_N : LASX_I5_U<0b01110110100101100>, ++ LASX_I5_U_DESC_BASE<"xvmini.bu", umin, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVMINI_HU_N : LASX_I5_U<0b01110110100101101>, ++ LASX_I5_U_DESC_BASE<"xvmini.hu", umin, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVMINI_WU_N : LASX_I5_U<0b01110110100101110>, ++ LASX_I5_U_DESC_BASE<"xvmini.wu", umin, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVMINI_DU_N : LASX_I5_U<0b01110110100101111>, ++ LASX_I5_U_DESC_BASE<"xvmini.du", umin, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSEQI_B_N : LASX_I5<0b01110110100000000>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.b", SETEQ, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSEQI_H_N : LASX_I5<0b01110110100000001>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.h", SETEQ, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSEQI_W_N : LASX_I5<0b01110110100000010>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.w", SETEQ, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSEQI_D_N : LASX_I5<0b01110110100000011>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.d", SETEQ, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLEI_B_N : LASX_I5<0b01110110100000100>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.b", SETLE, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSLEI_H_N : LASX_I5<0b01110110100000101>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.h", SETLE, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSLEI_W_N : LASX_I5<0b01110110100000110>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.w", SETLE, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSLEI_D_N : LASX_I5<0b01110110100000111>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.d", SETLE, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLEI_BU_N : LASX_I5_U<0b01110110100001000>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.bu", SETULE, v32i8, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSLEI_HU_N : LASX_I5_U<0b01110110100001001>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.hu", SETULE, v16i16, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSLEI_WU_N : LASX_I5_U<0b01110110100001010>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.wu", SETULE, v8i32, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLEI_DU_N : LASX_I5_U<0b01110110100001011>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.du", SETULE, v4i64, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSLTI_B_N : LASX_I5<0b01110110100001100>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.b", SETLT, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSLTI_H_N : LASX_I5<0b01110110100001101>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.h", SETLT, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSLTI_W_N : LASX_I5<0b01110110100001110>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.w", SETLT, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSLTI_D_N : LASX_I5<0b01110110100001111>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.d", SETLT, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLTI_BU_N : LASX_I5_U<0b01110110100010000>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.bu", SETULT, v32i8, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSLTI_HU_N : LASX_I5_U<0b01110110100010001>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.hu", SETULT, v16i16, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSLTI_WU_N : LASX_I5_U<0b01110110100010010>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.wu", SETULT, v8i32, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLTI_DU_N : LASX_I5_U<0b01110110100010011>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.du", SETULT, v4i64, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVADDI_BU_N : LASX_I5_U<0b01110110100010100>, ++ LASX_I5_U_DESC_BASE<"xvaddi.bu", add, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVADDI_HU_N : LASX_I5_U<0b01110110100010101>, ++ LASX_I5_U_DESC_BASE<"xvaddi.hu", add, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVADDI_WU_N : LASX_I5_U<0b01110110100010110>, ++ LASX_I5_U_DESC_BASE<"xvaddi.wu", add, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVADDI_DU_N : LASX_I5_U<0b01110110100010111>, ++ LASX_I5_U_DESC_BASE<"xvaddi.du", add, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSUBI_BU_N : LASX_I5_U<0b01110110100011000>, ++ LASX_I5_U_DESC_BASE<"xvsubi.bu", sub, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSUBI_HU_N : LASX_I5_U<0b01110110100011001>, ++ LASX_I5_U_DESC_BASE<"xvsubi.hu", sub, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSUBI_WU_N : LASX_I5_U<0b01110110100011010>, ++ LASX_I5_U_DESC_BASE<"xvsubi.wu", sub, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSUBI_DU_N : LASX_I5_U<0b01110110100011011>, ++ LASX_I5_U_DESC_BASE<"xvsubi.du", sub, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVPERMI_QH : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256HOpnd, LASX256HOpnd>; ++ ++def XVPERMI_QW : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256WOpnd, LASX256WOpnd>; ++ ++def XVPERMI_QD : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITSELI_B_N : LASX_I8_U<0b01110111110001>, ++ LASX_2R_3R_U8_SELECT<"xvbitseli.b", vselect, LASX256BOpnd, LASX256BOpnd>; ++ ++} ++ ++ ++def : LASXPat<(v8f32 (load addrimm12:$addr)), (XVLD_W addrimm12:$addr)>; ++def : LASXPat<(v4f64 (load addrimm12:$addr)), (XVLD_D addrimm12:$addr)>; ++ ++def XVST_FW : LASXPat<(store (v8f32 LASX256W:$xj), addrimm12:$addr), ++ (XVST_W LASX256W:$xj, addrimm12:$addr)>; ++def XVST_FD : LASXPat<(store (v4f64 LASX256D:$xj), addrimm12:$addr), ++ (XVST_D LASX256D:$xj, addrimm12:$addr)>; ++ ++def XVNEG_FW : LASXPat<(fneg (v8f32 LASX256W:$xj)), ++ (XVBITREVI_W LASX256W:$xj, 31)>; ++def XVNEG_FD : LASXPat<(fneg (v4f64 LASX256D:$xj)), ++ (XVBITREVI_D LASX256D:$xj, 63)>; ++ ++ ++def : LASXPat<(v4i64 (LoongArchVABSD v4i64:$xj, v4i64:$xk, (i32 0))), ++ (v4i64 (XVABSD_D $xj, $xk))>; ++ ++def : LASXPat<(v8i32 (LoongArchVABSD v8i32:$xj, v8i32:$xk, (i32 0))), ++ (v8i32 (XVABSD_W $xj, $xk))>; ++ ++def : LASXPat<(v16i16 (LoongArchVABSD v16i16:$xj, v16i16:$xk, (i32 0))), ++ (v16i16 (XVABSD_H $xj, $xk))>; ++ ++def : LASXPat<(v32i8 (LoongArchVABSD v32i8:$xj, v32i8:$xk, (i32 0))), ++ (v32i8 (XVABSD_B $xj, $xk))>; ++ ++def : LASXPat<(v4i64 (LoongArchUVABSD v4i64:$xj, v4i64:$xk, (i32 0))), ++ (v4i64 (XVABSD_DU $xj, $xk))>; ++ ++def : LASXPat<(v8i32 (LoongArchUVABSD v8i32:$xj, v8i32:$xk, (i32 0))), ++ (v8i32 (XVABSD_WU $xj, $xk))>; ++ ++def : LASXPat<(v16i16 (LoongArchUVABSD v16i16:$xj, v16i16:$xk, (i32 0))), ++ (v16i16 (XVABSD_HU $xj, $xk))>; ++ ++def : LASXPat<(v32i8 (LoongArchUVABSD v32i8:$xj, v32i8:$xk, (i32 0))), ++ (v32i8 (XVABSD_BU $xj, $xk))>; ++ ++ ++def : LASXPat<(or v32i8:$vj, (shl vsplat_imm_eq_1, v32i8:$vk)), ++ (XVBITSET_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(or v16i16:$vj, (shl vsplat_imm_eq_1, v16i16:$vk)), ++ (XVBITSET_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(or v8i32:$vj, (shl vsplat_imm_eq_1, v8i32:$vk)), ++ (XVBITSET_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(or v4i64:$vj, (shl vsplat_imm_eq_1, v4i64:$vk)), ++ (XVBITSET_D v4i64:$vj, v4i64:$vk)>; ++ ++def : LASXPat<(xor v32i8:$vj, (shl xvsplat_imm_eq_1, v32i8:$vk)), ++ (XVBITREV_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(xor v16i16:$vj, (shl xvsplat_imm_eq_1, v16i16:$vk)), ++ (XVBITREV_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(xor v8i32:$vj, (shl xvsplat_imm_eq_1, v8i32:$vk)), ++ (XVBITREV_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(xor v4i64:$vj, (shl (v4i64 xvsplati64_imm_eq_1), v4i64:$vk)), ++ (XVBITREV_D v4i64:$vj, v4i64:$vk)>; ++ ++def : LASXPat<(and v32i8:$vj, (xor (shl vsplat_imm_eq_1, v32i8:$vk), immAllOnesV)), ++ (XVBITCLR_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(and v16i16:$vj, (xor (shl vsplat_imm_eq_1, v16i16:$vk), immAllOnesV)), ++ (XVBITCLR_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(and v8i32:$vj, (xor (shl vsplat_imm_eq_1, v8i32:$vk), immAllOnesV)), ++ (XVBITCLR_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(and v4i64:$vj, (xor (shl (v4i64 vsplati64_imm_eq_1), v4i64:$vk), (bitconvert (v8i32 immAllOnesV)))), ++ (XVBITCLR_D v4i64:$vj, v4i64:$vk)>; ++ ++def xvsplati64_imm_eq_63 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def xvsplati8imm7 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati8 immi32Cst7))>; ++def xvsplati16imm15 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati16 immi32Cst15))>; ++def xvsplati32imm31 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati32 immi32Cst31))>; ++def xvsplati64imm63 : PatFrag<(ops node:$wt), ++ (and node:$wt, xvsplati64_imm_eq_63)>; ++ ++ ++class LASXShiftPat : ++ LASXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++class LASXBitPat : ++ LASXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++multiclass LASXShiftPats { ++ def : LASXShiftPat(Insn#_B), ++ (xvsplati8 immi32Cst7)>; ++ def : LASXShiftPat(Insn#_H), ++ (xvsplati16 immi32Cst15)>; ++ def : LASXShiftPat(Insn#_W), ++ (xvsplati32 immi32Cst31)>; ++ def : LASXPat<(v4i64 (Node v4i64:$vs, (v4i64 (and v4i64:$vt, ++ xvsplati64_imm_eq_63)))), ++ (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; ++} ++ ++multiclass LASXBitPats { ++ def : LASXBitPat(Insn#_B), xvsplati8imm7>; ++ def : LASXBitPat(Insn#_H), xvsplati16imm15>; ++ def : LASXBitPat(Insn#_W), xvsplati32imm31>; ++ def : LASXPat<(Node v4i64:$vs, (shl (v4i64 xvsplati64_imm_eq_1), ++ (xvsplati64imm63 v4i64:$vt))), ++ (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; ++} ++ ++defm : LASXShiftPats; ++defm : LASXShiftPats; ++defm : LASXShiftPats; ++defm : LASXBitPats; ++defm : LASXBitPats; ++ ++def : LASXPat<(and v32i8:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati8imm7 v32i8:$vt)), ++ immAllOnesV)), ++ (v32i8 (XVBITCLR_B v32i8:$vs, v32i8:$vt))>; ++def : LASXPat<(and v16i16:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati16imm15 v16i16:$vt)), ++ immAllOnesV)), ++ (v16i16 (XVBITCLR_H v16i16:$vs, v16i16:$vt))>; ++def : LASXPat<(and v8i32:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati32imm31 v8i32:$vt)), ++ immAllOnesV)), ++ (v8i32 (XVBITCLR_W v8i32:$vs, v8i32:$vt))>; ++def : LASXPat<(and v4i64:$vs, (xor (shl (v4i64 xvsplati64_imm_eq_1), ++ (xvsplati64imm63 v4i64:$vt)), ++ (bitconvert (v8i32 immAllOnesV)))), ++ (v4i64 (XVBITCLR_D v4i64:$vs, v4i64:$vt))>; ++ ++ ++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), ++ (f32 fpimm1),(f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v8f32:$v), ++ (XVFRECIP_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), v4f64:$v), ++ (XVFRECIP_D v4f64:$v)>; ++ ++def : LASXPat<(fdiv (v8f32 fpimm1), v8f32:$v), ++ (XVFRECIP_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 fpimm1), v4f64:$v), ++ (XVFRECIP_D v4f64:$v)>; ++ ++ ++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), ++ (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v8f32:$v)), ++ (XVFRSQRT_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), (fsqrt v4f64:$v)), ++ (XVFRSQRT_D v4f64:$v)>; ++ ++def : LASXPat<(fdiv (v8f32 fpimm1), (fsqrt v8f32:$v)), ++ (XVFRSQRT_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 fpimm1), (fsqrt v4f64:$v)), ++ (XVFRSQRT_D v4f64:$v)>; ++ ++ ++def : LASXPat <(extract_subvector v4f64:$vec, (i32 0)), ++ (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8f32:$vec, (i32 0)), ++ (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i32 0)), ++ (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i32 0)), ++ (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i32 0)), ++ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i32 0)), ++ (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; ++ ++ ++ ++def : LASXPat <(extract_subvector v4f64:$vec, (i64 0)), ++ (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8f32:$vec, (i64 0)), ++ (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i64 0)), ++ (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i64 0)), ++ (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i64 0)), ++ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i64 0)), ++ (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; ++ ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i32 2)), ++ (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i32 4)), ++ (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i32 8)), ++ (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i32 16)), ++ (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; ++ ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i64 2)), ++ (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i64 4)), ++ (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i64 8)), ++ (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i64 16)), ++ (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; ++ ++ ++def : LASXPat<(abs v4i64:$v), ++ (XVMAX_D v4i64:$v, (XVNEG_D v4i64:$v))>; ++ ++def : LASXPat<(abs v8i32:$v), ++ (XVMAX_W v8i32:$v, (XVNEG_W v8i32:$v))>; ++ ++def : LASXPat<(abs v16i16:$v), ++ (XVMAX_H v16i16:$v, (XVNEG_H v16i16:$v))>; ++ ++def : LASXPat<(abs v32i8:$v), ++ (XVMAX_B v32i8:$v, (XVNEG_B v32i8:$v))>; ++ ++ ++def : LASXPat<(sub (v32i8 immAllZerosV), v32i8:$v), ++ (XVNEG_B v32i8:$v)>; ++ ++def : LASXPat<(sub (v16i16 immAllZerosV), v16i16:$v), ++ (XVNEG_H v16i16:$v)>; ++ ++def : LASXPat<(sub (v8i32 immAllZerosV), v8i32:$v), ++ (XVNEG_W v8i32:$v)>; ++ ++def : LASXPat<(sub (v4i64 immAllZerosV), v4i64:$v), ++ (XVNEG_D v4i64:$v)>; ++ ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i32 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 2)), ++ (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v8i32 immAllZerosV), ++ (v4i32 LSX128W:$src), (i32 4)), ++ (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 8)), ++ (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 16)), ++ (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 2)), ++ (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 4)), ++ (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 8)), ++ (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 16)), ++ (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(sra ++ (v32i8 (add ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (srl ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(sra ++ (v16i16 (add ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (srl ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(sra ++ (v8i32 (add ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (srl ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31), ++ (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(sra ++ (v4i64 (add ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (srl ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (build_vector (i64 63),(i64 63),(i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), ++ (XVAVG_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(srl ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(srl ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(srl ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(srl ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) ++ ) ++ ), ++ (XVAVG_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(sra ++ (v32i8 (add ++ (v32i8 (add (v32i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (srl ++ (v32i8 ( add (v32i8( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVGR_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++ ++def : LASXPat<(sra ++ (v16i16 (add ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (srl ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (build_vector ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVGR_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++ ++def : LASXPat<(sra ++ (v8i32 (add ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (srl ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (build_vector ++ (i32 31),(i32 31),(i32 31),(i32 31), ++ (i32 31),(i32 31),(i32 31),(i32 31) ++ ) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (XVAVGR_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(sra ++ (v4i64 (add ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (srl ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (build_vector ++ (i64 63),(i64 63),(i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), ++ (XVAVGR_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(srl ++ (v32i8 (add (v32i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(srl ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(srl ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(srl ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) ++ ) ++ ), ++ (XVAVGR_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), ++ (XVMUH_D LASX256D:$a, LASX256D:$b)>; ++ ++def : LASXPat<(mulhs LASX256W:$a, LASX256W:$b), ++ (XVMUH_W LASX256W:$a, LASX256W:$b)>; ++ ++def : LASXPat<(mulhs LASX256H:$a, LASX256H:$b), ++ (XVMUH_H LASX256H:$a, LASX256H:$b)>; ++ ++def : LASXPat<(mulhs LASX256B:$a, LASX256B:$b), ++ (XVMUH_B LASX256B:$a, LASX256B:$b)>; ++ ++ ++def : LASXPat<(mulhu LASX256D:$a, LASX256D:$b), ++ (XVMUH_DU LASX256D:$a, LASX256D:$b)>; ++ ++def : LASXPat<(mulhu LASX256W:$a, LASX256W:$b), ++ (XVMUH_WU LASX256W:$a, LASX256W:$b)>; ++ ++def : LASXPat<(mulhu LASX256H:$a, LASX256H:$b), ++ (XVMUH_HU LASX256H:$a, LASX256H:$b)>; ++ ++def : LASXPat<(mulhu LASX256B:$a, LASX256B:$b), ++ (XVMUH_BU LASX256B:$a, LASX256B:$b)>; ++ ++ ++def : LASXPat<(LoongArchINSVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVINSVE0_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchINSVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVINSVE0_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 (bitconvert (v32i8 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )))), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 (bitconvert (v32i8 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )))), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 (build_vector ++ (i64 0),(i64 0),(i64 0),(i64 0) ++ )), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVSHUF4I (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm8_32:$ui8), ++ (XVSHUF4I_D LASX256D:$a, LASX256D:$b, uimm8_32:$ui8)>; ++ ++def : LASXPat<(LoongArchXVPERMI (v4i64 LASX256D:$a), uimm8_32:$ui8), ++ (XVPERMI_D LASX256D:$a, uimm8_32:$ui8)>; ++ ++ ++ ++ ++//===----------------------------------------------------------------------===// ++// Intrinsics ++//===----------------------------------------------------------------------===// ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_COR_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_COR_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUN_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUN_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUNE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUNE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUEQ_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CEQ_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CNE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CNE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CLT_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CLT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CULT_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CULT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CLE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CLE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CULE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CULE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvseq_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSEQ_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSEQ_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSEQ_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsle_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLE_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLE_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLE_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsle_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLE_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLE_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLE_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLE_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvslt_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLT_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLT_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLT_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvslt_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLT_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLT_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLT_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLT_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVADD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVADD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVADD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSUB_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSUB_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSUB_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmax_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMAX_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMAX_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMAX_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMAX_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmin_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMIN_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMIN_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMIN_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMIN_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmin_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMIN_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMIN_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMIN_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMIN_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmul_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMUL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMUL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMUL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMUL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvdiv_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVDIV_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVDIV_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVDIV_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVDIV_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsll_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsrl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSRL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSRL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSRL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSRL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsra_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSRA_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSRA_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSRA_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSRA_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfadd_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFADD_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfadd_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfsub_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFSUB_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfsub_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfmul_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFMUL_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfmul_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFMUL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfdiv_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFDIV_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfdiv_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFDIV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfsqrt_s (v8f32 LASX256W:$xj)), ++ (XVFSQRT_S LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvfsqrt_d (v4f64 LASX256D:$xj)), ++ (XVFSQRT_D LASX256D:$xj)>; ++ ++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_w (v8i32 LASX256W:$xj))), ++ (XVFFINT_S_W (v8i32 LASX256W:$xj))>; ++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_wu (v8i32 LASX256W:$xj))), ++ (XVFFINT_S_WU (v8i32 LASX256W:$xj))>; ++ ++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_l (v4i64 LASX256D:$xj))), ++ (XVFFINT_D_L (v4i64 LASX256D:$xj))>; ++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_lu (v4i64 LASX256D:$xj))), ++ (XVFFINT_D_LU (v4i64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_b GPR32Opnd:$rj), ++ (XVREPLGR2VR_B GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_h GPR32Opnd:$rj), ++ (XVREPLGR2VR_H GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_w GPR32Opnd:$rj), ++ (XVREPLGR2VR_W GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_d GPR64Opnd:$rj), ++ (XVREPLGR2VR_D GPR64Opnd:$rj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE2GR_W LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE2GR_D LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_wu (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE2GR_WU LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_du (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE2GR_DU LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvreplve0_d (v4i64 LASX256D:$xj)), ++ (XVREPLVE0_D (v4i64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_w (v8i32 LASX256W:$xj), GPR32Opnd:$rj, (immZExt3:$ui3)), ++ (XVINSGR2VR_W LASX256W:$xj, GPR32Opnd:$rj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_d (v4i64 LASX256D:$xj), GPR64Opnd:$rj, (immZExt2:$ui2)), ++ (XVINSGR2VR_D LASX256D:$xj, GPR64Opnd:$rj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve_w_f (v8f32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE_W (v8f32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve_d_f (v4f64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE_D (v4f64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvdiv_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVDIV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVDIV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVDIV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVDIV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmod_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMOD_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMOD_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMOD_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMOD_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmax_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMAX_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMAX_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMAX_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMAX_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfrint_s (v8f32 LASX256W:$xj)), ++ (XVFRINT_S LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvfrint_d (v4f64 LASX256D:$xj)), ++ (XVFRINT_D LASX256D:$xj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpackod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPACKOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPACKOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPACKOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPACKOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpackev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPACKEV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPACKEV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPACKEV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPACKEV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvilvh_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVILVH_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVILVH_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVILVH_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVILVH_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvilvl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVILVL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVILVL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVILVL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVILVL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPICKEV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPICKEV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPICKEV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPICKEV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPICKOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPICKOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPICKOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPICKOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSADD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSADD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSADD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvssub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSSUB_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSSUB_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSSUB_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsadd_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSADD_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSADD_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSADD_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSADD_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvssub_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSSUB_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSSUB_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSSUB_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSSUB_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmadd_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMADD_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMADD_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMADD_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMADD_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmsub_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMSUB_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMSUB_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMSUB_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMSUB_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_wu_s (v8f32 LASX256W:$xj))), ++ (XVFTINTRZ_WU_S (v8f32 LASX256W:$xj))>; ++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_lu_d (v4f64 LASX256D:$xj))), ++ (XVFTINTRZ_LU_D (v4f64 LASX256D:$xj))>; ++ ++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_w_s (v8f32 LASX256W:$xj))), ++ (XVFTINTRZ_W_S (v8f32 LASX256W:$xj))>; ++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_l_d (v4f64 LASX256D:$xj))), ++ (XVFTINTRZ_L_D (v4f64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvbitclr_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVBITCLR_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVBITCLR_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVBITCLR_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVBITCLR_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvclz_b (v32i8 LASX256B:$xj)), ++ (XVCLZ_B LASX256B:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_h (v16i16 LASX256H:$xj)), ++ (XVCLZ_H LASX256H:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_w (v8i32 LASX256W:$xj)), ++ (XVCLZ_W LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_d (v4i64 LASX256D:$xj)), ++ (XVCLZ_D LASX256D:$xj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpcnt_b (v32i8 LASX256B:$xj)), ++ (XVPCNT_B LASX256B:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_h (v16i16 LASX256H:$xj)), ++ (XVPCNT_H LASX256H:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_w (v8i32 LASX256W:$xj)), ++ (XVPCNT_W LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_d (v4i64 LASX256D:$xj)), ++ (XVPCNT_D LASX256D:$xj)>; ++ ++ ++def : LASXPat<(v32i8 (load (add iPTR:$xj, iPTR:$xk))), ++ (XVLDX PtrRC:$xj, PtrRC:$xk)>; ++ ++def : LASXPat<(store (v32i8 LASX256B:$xd), (add iPTR:$xj, iPTR:$xk)), ++ (XVSTX LASX256B:$xd, PtrRC:$xj, PtrRC:$xk)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v8i32 LASX256W:$xj))), ++ (VEXT2XV_D_W LASX256W:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_W_H LASX256H:$xj)>; ++def : LASXPat<(v16i16 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_H_B LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (zext_invec (v8i32 LASX256W:$xj))), ++ (VEXT2XV_DU_WU LASX256W:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_WU_HU LASX256H:$xj)>; ++def : LASXPat<(v16i16 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_HU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_D_H LASX256H:$xj)>; ++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_D_B LASX256B:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_W_B LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_DU_HU LASX256H:$xj)>; ++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_DU_BU LASX256B:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_WU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_D_H LASX256H:$xj)>; ++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_D_B LASX256B:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_W_B LASX256B:$xj)>; ++ ++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_DU_HU LASX256H:$xj)>; ++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_DU_BU LASX256B:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_WU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v16i16 (sext (v16i8 LSX128B:$vj))), ++ (VEXT2XV_H_B ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; ++ ++def : LASXPat<(v8i32 (sext (v8i16 LSX128H:$vj))), ++ (VEXT2XV_W_H ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; ++ ++def : LASXPat<(v4i64 (sext (v4i32 LSX128W:$vj))), ++ (VEXT2XV_D_W ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; ++ ++def : LASXPat<(v16i16 (zext (v16i8 LSX128B:$vj))), ++ (VEXT2XV_HU_BU ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; ++ ++def : LASXPat<(v8i32 (zext (v8i16 LSX128H:$vj))), ++ (VEXT2XV_WU_HU ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; ++ ++def : LASXPat<(v4i64 (zext (v4i32 LSX128W:$vj))), ++ (VEXT2XV_DU_WU ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; ++ ++ ++def : LASXPat<(xor ++ (v16i16 LASX256H:$xj), (xvsplati16 imm_mask) ++ ), ++ (XNOR_V_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xj))>; ++ ++def : LASXPat<(xor ++ (v8i32 LASX256W:$xj), (xvsplati32 imm_mask) ++ ), ++ (XNOR_V_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xj))>; ++ ++def : LASXPat<(xor ++ (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64) ++ ), ++ (XNOR_V_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xj))>; ++ ++ ++def : LASXPat<(and ++ (v32i8 (xor (v32i8 LASX256B:$xj), (xvsplati8 imm_mask))), ++ (v32i8 LASX256B:$xk) ++ ), ++ (XVANDN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; ++ ++def : LASXPat<(and ++ (v16i16 (xor (v16i16 LASX256H:$xj), (xvsplati16 imm_mask))), ++ (v16i16 LASX256H:$xk) ++ ), ++ (XVANDN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; ++ ++def : LASXPat<(and ++ (v8i32 (xor (v8i32 LASX256W:$xj), (xvsplati32 imm_mask))), ++ (v8i32 LASX256W:$xk) ++ ), ++ (XVANDN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; ++ ++def : LASXPat<(and ++ (v4i64 (xor (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64))), ++ (v4i64 LASX256D:$xk) ++ ), ++ (XVANDN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; ++ ++ ++def : LASXPat<(or ++ (v32i8 LASX256B:$xj), ++ (v32i8 (xor (v32i8 LASX256B:$xk), (xvsplati8 imm_mask))) ++ ), ++ (XVORN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; ++ ++def : LASXPat<(or ++ (v16i16 LASX256H:$xj), ++ (v16i16 (xor (v16i16 LASX256H:$xk), (xvsplati16 imm_mask))) ++ ), ++ (XVORN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; ++ ++def : LASXPat<(or ++ (v8i32 LASX256W:$xj), ++ (v8i32 (xor (v8i32 LASX256W:$xk), (xvsplati32 imm_mask))) ++ ), ++ (XVORN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; ++ ++def : LASXPat<(or ++ (v4i64 LASX256D:$xj), ++ (v4i64 (xor (v4i64 LASX256D:$xk), (xvsplati64 imm_mask_64))) ++ ), ++ (XVORN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; ++ ++ ++def : LASXPat<(add (v4i64 (abs LASX256D:$a)), (v4i64 (abs LASX256D:$b))), ++ (XVADDA_D (v4i64 LASX256D:$a),(v4i64 LASX256D:$b))>; ++ ++def : LASXPat<(add (v8i32 (abs LASX256W:$a)), (v8i32 (abs LASX256W:$b))), ++ (XVADDA_W (v8i32 LASX256W:$a),(v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(add (v16i16 (abs LASX256H:$a)), (v16i16 (abs LASX256H:$b))), ++ (XVADDA_H (v16i16 LASX256H:$a),(v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(add (v32i8 (abs LASX256B:$a)), (v32i8 (abs LASX256B:$b))), ++ (XVADDA_B (v32i8 LASX256B:$a),(v32i8 LASX256B:$b))>; ++ ++ ++def : LASXPat<(and v32i8:$xj, (xor (shl xvsplat_imm_eq_1, v32i8:$xk), ++ (xvsplati8 imm_mask))), ++ (XVBITCLR_B v32i8:$xj, v32i8:$xk)>; ++ ++def : LASXPat<(and v16i16:$xj, (xor (shl xvsplat_imm_eq_1, v16i16:$xk), ++ (xvsplati16 imm_mask))), ++ (XVBITCLR_H v16i16:$xj, v16i16:$xk)>; ++ ++def : LASXPat<(and v8i32:$xj, (xor (shl xvsplat_imm_eq_1, v8i32:$xk), ++ (xvsplati32 imm_mask))), ++ (XVBITCLR_W v8i32:$xj, v8i32:$xk)>; ++ ++def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), ++ (xvsplati64 imm_mask_64))), ++ (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; ++ ++ ++def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), ++ (v8i16 LSX128H:$src), (i64 0)), ++ (XVPERMI_QH (v16i16 LASX256H:$dst), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), ++ (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), ++ (v4i32 LSX128W:$src), (i64 0)), ++ (XVPERMI_QW (v8i32 LASX256W:$dst), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), ++ (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), ++ (v2i64 LSX128D:$src), (i64 0)), ++ (XVPERMI_QD (v4i64 LASX256D:$dst), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), ++ (i32 48))>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td +new file mode 100644 +index 000000000..50df4d724 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td +@@ -0,0 +1,449 @@ ++//===- LoongArchLSXInstrFormats.td - LoongArch LSX Instruction Formats ---*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++class LSXInst : InstLA<(outs), (ins), "", [], FrmOther>, ++ EXT_LSX { ++} ++ ++class LSXCBranch : LSXInst { ++} ++ ++class LSXSpecial : LSXInst { ++} ++ ++class LSXPseudo pattern>: ++ LoongArchPseudo { ++ let Predicates = [HasLSX]; ++} ++ ++class LSX_3R op>: LSXInst { ++ bits<5> vk; ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_3R_1GP op>: LSXInst { ++ bits<5> rk; ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5 op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> si5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = si5; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_2R op>: LSXInst { ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_2R_1GP op>: LSXInst { ++ bits<5> rj; ++ bits<5> vd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I1_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I2_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I3_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I4_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I6_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<6> ui6; ++ ++ let Inst{31-16} = op; ++ let Inst{15-10} = ui6; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I1_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I2_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I3_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I4_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_ELM_COPY_B op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_H op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_W op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_D op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_I8_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<8> ui8; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = ui8; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I7_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<7> ui7; ++ ++ let Inst{31-17} = op; ++ let Inst{16-10} = ui7; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I12_S op>: LSXInst { ++ bits<5> vd; ++// bits<5> rj; ++// bits<12> si12; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI12_S op>: LSXInst { ++ bits<5> vd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI11_S op>: LSXInst { ++ bits<5> vd; ++ bits<16> addr; ++ ++ let Inst{31-21} = op; ++ let Inst{20-10} = addr{10-0}; ++ let Inst{9-5} = addr{15-11}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI10_S op>: LSXInst { ++ bits<5> vd; ++ bits<15> addr; ++ ++ let Inst{31-20} = op; ++ let Inst{19-10} = addr{9-0}; ++ let Inst{9-5} = addr{14-10}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI9_S op>: LSXInst { ++ bits<5> vd; ++ bits<14> addr; ++ ++ let Inst{31-19} = op; ++ let Inst{18-10} = addr{8-0}; ++ let Inst{9-5} = addr{13-9}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SET op>: LSXInst { ++ bits<5> vj; ++ bits<3> cd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = vj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class LSX_VR4MUL op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> vk; ++ bits<5> va; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = va; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_VFCMP op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> vk; ++ bits<5> cond; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_Addr_SI8_idx1 op>: LSXInst { ++ bits<5> vd; ++ bits<13> addr; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18-11} = addr{7-0}; ++ let Inst{10} = idx; ++ let Inst{9-5} = addr{12-8}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx1 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx2 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<2> idx; ++ ++ let Inst{31-20} = op; ++ let Inst{19-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx3 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<3> idx; ++ ++ let Inst{31-21} = op; ++ let Inst{20-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx4 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<4> idx; ++ ++ let Inst{31-22} = op; ++ let Inst{21-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_3R_2GP op>: LSXInst { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5_mode_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> mode; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = mode; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_1R_I13 op>: LSXInst { ++ bits<13> i13; ++ bits<5> vd; ++ ++ let Inst{31-18} = op; ++ let Inst{17-5} = i13; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_1R_I13_I10 op>: LSXInst { ++ bits<10> i10; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-5} = i10; ++ let Inst{4-0} = vd; ++} ++ ++ ++ ++ ++ ++ ++ +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +new file mode 100644 +index 000000000..3d0ea3901 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -0,0 +1,5906 @@ ++//===- LoongArchLSXInstrInfo.td - LSX instructions -*- tablegen ------------*-=// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes LoongArch LSX instructions. ++// ++//===----------------------------------------------------------------------===// ++ ++def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; ++def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, ++ SDTCisInt<1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, OtherVT>]>; ++def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, ++ SDTCisFP<1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, OtherVT>]>; ++def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisInt<1>, SDTCisVec<1>, ++ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; ++def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; ++def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; ++def SDTVABSD : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; ++ ++def SDT_VBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; ++def LoongArchVBROADCAST : SDNode<"LoongArchISD::VBROADCAST", SDT_VBROADCAST>; ++ ++def LoongArchVAllNonZero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>; ++def LoongArchVAnyNonZero : SDNode<"LoongArchISD::VANY_NONZERO", SDT_LoongArchVecCond>; ++def LoongArchVAllZero : SDNode<"LoongArchISD::VALL_ZERO", SDT_LoongArchVecCond>; ++def LoongArchVAnyZero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>; ++def LoongArchVNOR : SDNode<"LoongArchISD::VNOR", SDTIntBinOp, ++ [SDNPCommutative, SDNPAssociative]>; ++def LoongArchVSHF : SDNode<"LoongArchISD::VSHF", SDT_VSHF>; ++def LoongArchSHF : SDNode<"LoongArchISD::SHF", SDT_SHF>; ++def LoongArchVPACKEV : SDNode<"LoongArchISD::VPACKEV", SDT_ILV>; ++def LoongArchVPACKOD : SDNode<"LoongArchISD::VPACKOD", SDT_ILV>; ++def LoongArchVILVH : SDNode<"LoongArchISD::VILVH", SDT_ILV>; ++def LoongArchVILVL : SDNode<"LoongArchISD::VILVL", SDT_ILV>; ++def LoongArchVPICKEV : SDNode<"LoongArchISD::VPICKEV", SDT_ILV>; ++def LoongArchVPICKOD : SDNode<"LoongArchISD::VPICKOD", SDT_ILV>; ++def LoongArchVABSD : SDNode<"LoongArchISD::VABSD", SDTVABSD>; ++def LoongArchUVABSD : SDNode<"LoongArchISD::UVABSD", SDTVABSD>; ++ ++def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; ++def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; ++ ++def LoongArchVExtractSExt : SDNode<"LoongArchISD::VEXTRACT_SEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; ++def LoongArchVExtractZExt : SDNode<"LoongArchISD::VEXTRACT_ZEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; ++ ++def immZExt1Ptr : ImmLeaf(Imm);}]>; ++def immZExt2Ptr : ImmLeaf(Imm);}]>; ++def immZExt3Ptr : ImmLeaf(Imm);}]>; ++def immZExt4Ptr : ImmLeaf(Imm);}]>; ++def immZExt5Ptr : ImmLeaf(Imm);}]>; ++def immZExt10 : ImmLeaf(Imm);}]>; ++def immZExt8 : ImmLeaf(Imm);}]>; ++def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; ++def immZExt6 : ImmLeaf; ++def immZExt4 : ImmLeaf(Imm);}]>; ++def immZExt3 : ImmLeaf(Imm);}]>; ++def immZExt2 : ImmLeaf(Imm);}]>; ++def immZExt1 : ImmLeaf(Imm);}]>; ++def immSExt12_l : ImmLeaf(Imm);}]>; ++def immSExt11Ptr : ImmLeaf(Imm);}]>; ++ ++def immSExt11_1 : ImmLeaf(Imm<<1);}]>; ++def immSExt10Ptr : ImmLeaf(Imm);}]>; ++def immSExt10_2 : ImmLeaf(Imm<<2);}]>; ++def immSExt9Ptr : ImmLeaf(Imm);}]>; ++def immSExt9_3 : ImmLeaf(Imm<<3);}]>; ++def immSExt8 : ImmLeaf(Imm);}]>; ++def immSExt5 : ImmLeaf(Imm);}]>; ++def immSExt8_1 : ImmLeaf(Imm<<1);}]>; ++def immSExt8_2 : ImmLeaf(Imm<<2);}]>; ++def immSExt8_3 : ImmLeaf(Imm<<3);}]>; ++ ++def addrimm10 : ComplexPattern; ++def addrimm10lsl2 : ComplexPattern; ++def addrimm9lsl3 : ComplexPattern; ++def addrimm11lsl1 : ComplexPattern; ++ ++ ++class SimmLslAsmOperandClass Supers = [], ++ int Shift = 0> : AsmOperandClass { ++ let Name = "Simm" # Bits # "_Lsl" # Shift; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift; ++} ++ ++def Simm11Lsl1AsmOperand ++ : SimmLslAsmOperandClass<11, [], 1>; ++ ++def immSExt11_1_O : Operand { ++ let EncoderMethod = "getSImm11Lsl1Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++ let ParserMatchClass = Simm11Lsl1AsmOperand; ++} ++ ++def Simm10Lsl2AsmOperand ++ : SimmLslAsmOperandClass<10, [], 2>; ++ ++def immSExt10_2_O : Operand { ++ let EncoderMethod = "getSImm10Lsl2Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; ++ let ParserMatchClass = Simm10Lsl2AsmOperand; ++} ++ ++def Simm9Lsl3AsmOperand ++ : SimmLslAsmOperandClass<9, [], 3>; ++ ++def immSExt9_3_O : Operand { ++ let EncoderMethod = "getSImm9Lsl3Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++ let ParserMatchClass = Simm9Lsl3AsmOperand; ++} ++ ++def Simm8Lsl3AsmOperand ++ : SimmLslAsmOperandClass<8, [], 3>; ++ ++def immSExt8_3_O : Operand { ++ let EncoderMethod = "getSImm8Lsl3Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++ let ParserMatchClass = Simm8Lsl3AsmOperand; ++} ++ ++def Simm8Lsl2AsmOperand ++ : SimmLslAsmOperandClass<8, [], 2>; ++ ++def immSExt8_2_O : Operand { ++ let EncoderMethod = "getSImm8Lsl2Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; ++ let ParserMatchClass = Simm8Lsl2AsmOperand; ++} ++ ++def Simm8Lsl1AsmOperand ++ : SimmLslAsmOperandClass<8, [], 1>; ++ ++def immSExt8_1_O : Operand { ++ let EncoderMethod = "getSImm8Lsl1Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++ let ParserMatchClass = Simm8Lsl1AsmOperand; ++} ++ ++ ++class ConstantSImmAsmOperandClass Supers = [], ++ int Offset = 0> : AsmOperandClass { ++ let Name = "ConstantSImm" # Bits # "_" # Offset; ++ let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">"; ++ let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits # "_" # Offset; ++} ++ ++class ConstantUImmRangeAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "ConstantUImmRange" # Bottom # "_" # Top; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImmRange" # Bottom # "_" # Top; ++} ++ ++def SImm16RelaxedAsmOperandClass ++ : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> { ++ let Name = "SImm16_Relaxed"; ++ let PredicateMethod = "isAnyImm<16>"; ++ let DiagnosticType = "SImm16_Relaxed"; ++} ++ ++def ConstantSImm11Lsl1AsmOperandClass : AsmOperandClass { ++ let Name = "SImm11Lsl1"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<11, 1>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm11_Lsl1"; ++} ++ ++def ConstantSImm9Lsl3AsmOperandClass : AsmOperandClass { ++ let Name = "SImm9Lsl3"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<9, 3>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm9_Lsl3"; ++} ++ ++def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm10Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<10, 2>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm10_Lsl2"; ++} ++def ConstantSImm11AsmOperandClass ++ : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>; ++def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass { ++ let Name = "SImm10Lsl1"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<10, 1>"; ++ let SuperClasses = [ConstantSImm11AsmOperandClass]; ++ let DiagnosticType = "SImm10_Lsl1"; ++} ++def ConstantUImm10AsmOperandClass ++ : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>; ++def ConstantSImm10AsmOperandClass ++ : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>; ++def ConstantSImm9AsmOperandClass ++ : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>; ++def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm7Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<7, 2>"; ++ let SuperClasses = [ConstantSImm9AsmOperandClass]; ++ let DiagnosticType = "SImm7_Lsl2"; ++} ++def ConstantUImm8AsmOperandClass ++ : ConstantUImmAsmOperandClass<8, [ConstantSImm7Lsl2AsmOperandClass]>; ++def ConstantUImm7Sub1AsmOperandClass ++ : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> { ++ // Specify the names since the -1 offset causes invalid identifiers otherwise. ++ let Name = "UImm7_N1"; ++ let DiagnosticType = "UImm7_N1"; ++} ++def ConstantUImm7AsmOperandClass ++ : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>; ++def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "UImm6Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledUImm<6, 2>"; ++ let SuperClasses = [ConstantUImm7AsmOperandClass]; ++ let DiagnosticType = "UImm6_Lsl2"; ++} ++def ConstantUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>; ++def ConstantSImm6AsmOperandClass ++ : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>; ++def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "UImm5Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledUImm<5, 2>"; ++ let SuperClasses = [ConstantSImm6AsmOperandClass]; ++ let DiagnosticType = "UImm5_Lsl2"; ++} ++def ConstantUImm5_Range2_64AsmOperandClass ++ : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>; ++def ConstantUImm5Plus33AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass], ++ 33>; ++def ConstantUImm5ReportUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> { ++ let Name = "ConstantUImm5_0_Report_UImm6"; ++ let DiagnosticType = "UImm5_0_Report_UImm6"; ++} ++def ConstantUImm5Plus32AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>; ++def ConstantUImm5Plus32NormalizeAsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> { ++ let Name = "ConstantUImm5_32_Norm"; ++ // We must also subtract 32 when we render the operand. ++ let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; ++} ++def ConstantUImm5Plus1ReportUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{ ++ let Name = "ConstantUImm5_Plus1_Report_UImm6"; ++} ++def ConstantUImm5Plus1AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>; ++def ConstantUImm5AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>; ++def ConstantSImm5AsmOperandClass ++ : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>; ++def ConstantUImm4AsmOperandClass ++ : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>; ++def ConstantSImm4AsmOperandClass ++ : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>; ++def ConstantUImm3AsmOperandClass ++ : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>; ++def ConstantUImm2AsmOperandClass ++ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; ++def ConstantUImm1AsmOperandClass ++ : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; ++def ConstantImmzAsmOperandClass : AsmOperandClass { ++ let Name = "ConstantImmz"; ++ let RenderMethod = "addConstantUImmOperands<1>"; ++ let PredicateMethod = "isConstantImmz"; ++ let SuperClasses = [ConstantUImm1AsmOperandClass]; ++ let DiagnosticType = "Immz"; ++} ++ ++foreach I = {1, 2, 3, 4, 5, 6, 8} in ++ def vsplat_uimm # I : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {5, 10} in ++ def vsplat_simm # I : Operand { ++ let ParserMatchClass = ++ !cast("ConstantSImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {1, 4, 7, 8, 10, 20, 26} in ++ def uimm # I : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {1, 2, 3, 4, 5, 6, 7, 8} in ++ def uimm # I # _ptr : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++ ++def addrimm12 : ComplexPattern; ++ ++ ++def LoongArchMemSimm12AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm12"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<12>"; ++ let DiagnosticType = "MemSImm12"; ++} ++ ++def mem_simm12 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let ParserMatchClass = LoongArchMemSimm12AsmOperand; ++} ++ ++foreach I = {4, 6, 9, 10, 11} in ++ def simm # I : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantSImm" # I # "AsmOperandClass"); ++ } ++ ++def LoongArchMemSimm9AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm9"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<9>"; ++ let DiagnosticType = "MemSImm9"; ++} ++ ++def LoongArchMemSimm10AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm10"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<10>"; ++ let DiagnosticType = "MemSImm10"; ++} ++ ++def LoongArchMemSimm11AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm11"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<11>"; ++ let DiagnosticType = "MemSImm11"; ++} ++ ++def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; ++ ++def simm10Op : Operand { ++ let DecoderMethod = "DecodeSIMM10"; ++} ++ ++def simm13Op : Operand { ++ let DecoderMethod = "DecodeSIMM13"; ++} ++ ++def LoongArchMemSimm10Lsl2AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm10_2"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<10, 2>"; ++ let DiagnosticType = "MemSImm10Lsl2"; ++} ++ ++ ++def simm10_lsl2 : Operand { ++// let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, 2>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm10Lsl2AsmOperandClass"); ++} ++ ++def mem_simm10_lsl2 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm10_lsl2")); ++ let EncoderMethod = "getMemEncoding10l2"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm10Lsl2AsmOperand"); ++} ++ ++ ++def LoongArchMemSimm11Lsl1AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm11_1"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<11, 1>"; ++ let DiagnosticType = "MemSImm11Lsl1"; ++} ++ ++ ++def simm11_lsl1 : Operand { ++ // let DecoderMethod = "DecodeSImmWithOffsetAndScale<11, 1>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm11Lsl1AsmOperandClass"); ++} ++ ++def mem_simm11_lsl1 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm11_lsl1")); ++ let EncoderMethod = "getMemEncoding11l1"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm11Lsl1AsmOperand"); ++} ++ ++def LoongArchMemSimm9Lsl3AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm9_3"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<9, 3>"; ++ let DiagnosticType = "MemSImm9Lsl3"; ++} ++ ++ ++def simm9_lsl3 : Operand { ++ // let DecoderMethod = "DecodeSImmWithOffsetAndScale<9, 3>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm9Lsl3AsmOperandClass"); ++} ++ ++def mem_simm9_lsl3 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm9_lsl3")); ++ let EncoderMethod = "getMemEncoding9l3"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm9Lsl3AsmOperand"); ++} ++ ++ ++ ++ ++// Operands ++ ++def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; ++ ++// Pattern fragments ++def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i8)>; ++def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i16)>; ++def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i32)>; ++def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i64)>; ++ ++def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i8)>; ++def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i16)>; ++def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i32)>; ++def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i64)>; ++ ++def vldrepl_v16i8 : PatFrag<(ops node:$v1), ++ (v16i8 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v8i16 : PatFrag<(ops node:$v1), ++ (v8i16 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v4i32 : PatFrag<(ops node:$v1), ++ (v4i32 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v2i64 : PatFrag<(ops node:$v1), ++ (v2i64 (LoongArchVBROADCAST node:$v1))>; ++ ++def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v2i64 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v2i64 (vector_insert node:$vec, node:$val, node:$idx))>; ++ ++class vfsetcc_type : ++ PatFrag<(ops node:$lhs, node:$rhs), ++ (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; ++ ++// ISD::SETFALSE cannot occur ++def vfseteq_v4f32 : vfsetcc_type; ++def vfseteq_v2f64 : vfsetcc_type; ++def vfsetge_v4f32 : vfsetcc_type; ++def vfsetge_v2f64 : vfsetcc_type; ++def vfsetgt_v4f32 : vfsetcc_type; ++def vfsetgt_v2f64 : vfsetcc_type; ++def vfsetle_v4f32 : vfsetcc_type; ++def vfsetle_v2f64 : vfsetcc_type; ++def vfsetlt_v4f32 : vfsetcc_type; ++def vfsetlt_v2f64 : vfsetcc_type; ++def vfsetne_v4f32 : vfsetcc_type; ++def vfsetne_v2f64 : vfsetcc_type; ++def vfsetoeq_v4f32 : vfsetcc_type; ++def vfsetoeq_v2f64 : vfsetcc_type; ++def vfsetoge_v4f32 : vfsetcc_type; ++def vfsetoge_v2f64 : vfsetcc_type; ++def vfsetogt_v4f32 : vfsetcc_type; ++def vfsetogt_v2f64 : vfsetcc_type; ++def vfsetole_v4f32 : vfsetcc_type; ++def vfsetole_v2f64 : vfsetcc_type; ++def vfsetolt_v4f32 : vfsetcc_type; ++def vfsetolt_v2f64 : vfsetcc_type; ++def vfsetone_v4f32 : vfsetcc_type; ++def vfsetone_v2f64 : vfsetcc_type; ++def vfsetord_v4f32 : vfsetcc_type; ++def vfsetord_v2f64 : vfsetcc_type; ++def vfsetun_v4f32 : vfsetcc_type; ++def vfsetun_v2f64 : vfsetcc_type; ++def vfsetueq_v4f32 : vfsetcc_type; ++def vfsetueq_v2f64 : vfsetcc_type; ++def vfsetuge_v4f32 : vfsetcc_type; ++def vfsetuge_v2f64 : vfsetcc_type; ++def vfsetugt_v4f32 : vfsetcc_type; ++def vfsetugt_v2f64 : vfsetcc_type; ++def vfsetule_v4f32 : vfsetcc_type; ++def vfsetule_v2f64 : vfsetcc_type; ++def vfsetult_v4f32 : vfsetcc_type; ++def vfsetult_v2f64 : vfsetcc_type; ++def vfsetune_v4f32 : vfsetcc_type; ++def vfsetune_v2f64 : vfsetcc_type; ++ ++ ++ ++// ISD::SETTRUE cannot occur ++// ISD::SETFALSE2 cannot occur ++// ISD::SETTRUE2 cannot occur ++ ++class vsetcc_type : ++ PatFrag<(ops node:$lhs, node:$rhs), ++ (ResTy (vsetcc node:$lhs, node:$rhs, CC))>; ++ ++def vseteq_v16i8 : vsetcc_type; ++def vseteq_v8i16 : vsetcc_type; ++def vseteq_v4i32 : vsetcc_type; ++def vseteq_v2i64 : vsetcc_type; ++def vsetle_v16i8 : vsetcc_type; ++def vsetle_v8i16 : vsetcc_type; ++def vsetle_v4i32 : vsetcc_type; ++def vsetle_v2i64 : vsetcc_type; ++def vsetlt_v16i8 : vsetcc_type; ++def vsetlt_v8i16 : vsetcc_type; ++def vsetlt_v4i32 : vsetcc_type; ++def vsetlt_v2i64 : vsetcc_type; ++def vsetule_v16i8 : vsetcc_type; ++def vsetule_v8i16 : vsetcc_type; ++def vsetule_v4i32 : vsetcc_type; ++def vsetule_v2i64 : vsetcc_type; ++def vsetult_v16i8 : vsetcc_type; ++def vsetult_v8i16 : vsetcc_type; ++def vsetult_v4i32 : vsetcc_type; ++def vsetult_v2i64 : vsetcc_type; ++ ++def vsplati8 : PatFrag<(ops node:$e0), ++ (v16i8 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplati16 : PatFrag<(ops node:$e0), ++ (v8i16 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplati32 : PatFrag<(ops node:$e0), ++ (v4i32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++ ++def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vsplati64 : PatFrag<(ops node:$e0), ++ (v2i64 (build_vector node:$e0, node:$e0))>; ++ ++def vsplati64_splat_d : PatFrag<(ops node:$e0), ++ (v2i64 (bitconvert ++ (v4i32 (and ++ (v4i32 (build_vector node:$e0, ++ node:$e0, ++ node:$e0, ++ node:$e0)), ++ vsplati64_imm_eq_1))))>; ++ ++def vsplatf32 : PatFrag<(ops node:$e0), ++ (v4f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplatf64 : PatFrag<(ops node:$e0), ++ (v2f64 (build_vector node:$e0, node:$e0))>; ++ ++def vsplati8_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati8 node:$i), node:$v, node:$v)>; ++def vsplati16_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati16 node:$i), node:$v, node:$v)>; ++def vsplati32_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati32 node:$i), node:$v, node:$v)>; ++def vsplati64_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati64_splat_d node:$i),node:$v, node:$v)>; ++ ++class SplatPatLeaf ++ : PatLeaf { ++ Operand OpClass = opclass; ++} ++ ++class SplatComplexPattern roots = [], ++ list props = []> : ++ ComplexPattern { ++ Operand OpClass = opclass; ++} ++ ++def vsplati8_uimm3 : SplatComplexPattern; ++ ++def vsplati8_uimm4 : SplatComplexPattern; ++ ++def vsplati8_uimm5 : SplatComplexPattern; ++ ++def vsplati8_uimm8 : SplatComplexPattern; ++ ++def vsplati8_simm5 : SplatComplexPattern; ++ ++def vsplati16_uimm3 : SplatComplexPattern; ++ ++def vsplati16_uimm4 : SplatComplexPattern; ++ ++def vsplati16_uimm5 : SplatComplexPattern; ++ ++def vsplati16_simm5 : SplatComplexPattern; ++ ++def vsplati32_uimm2 : SplatComplexPattern; ++ ++def vsplati32_uimm5 : SplatComplexPattern; ++ ++def vsplati32_simm5 : SplatComplexPattern; ++ ++def vsplati64_uimm1 : SplatComplexPattern; ++ ++def vsplati64_uimm5 : SplatComplexPattern; ++ ++def vsplati64_uimm6 : SplatComplexPattern; ++ ++def vsplati64_simm5 : SplatComplexPattern; ++ ++ ++// Any build_vector that is a constant splat with a value that equals 1 ++// FIXME: These should be a ComplexPattern but we can't use them because the ++// ISel generator requires the uses to have a name, but providing a name ++// causes other errors ("used in pattern but not operand list") ++def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vbitclr_b : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_h : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_w : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_d : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk), ++ (bitconvert (v4i32 immAllOnesV))))>; ++ ++def vbneg_b : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_h : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_w : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_d : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk))>; ++ ++def vbset_b : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_h : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_w : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_d : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk))>; ++ ++def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (add node:$vd, (mul node:$vj, node:$vk))>; ++ ++def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (sub node:$vd, (mul node:$vj, node:$vk))>; ++ ++class IsCommutable { ++ bit isCommutable = 1; ++} ++ ++ ++ ++//class ++class LSX_3R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_3RN_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3R_4R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ ROVK:$vk))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_3R_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, GPR32Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $rk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, GPR32Opnd:$rk))]; ++} ++ ++class LSX_VEC_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_VEC_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVJ:$vj, ROVK:$vk), ++ [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]>; ++ ++class LSX_3RF_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3RFN_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3R_DESC_BASE1 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vk, ROVK:$vj))]; ++} ++ ++class LSX_3RF_DESC_BASE1 : ++ LSX_3R_DESC_BASE1; ++ ++class LSX_3R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$vd_in, ROVJ:$vj, ++ ROVK:$vk))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_3R_4R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVD:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$va, ROVJ:$vj, ++ ROVK:$vk))]; ++} ++ ++class LSX_I5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$si5))]; ++} ++ ++class LSX_I5_U_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; ++} ++ ++class LSX_BIT_3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui3))]; ++} ++ ++class LSX_BIT_3N_DESC_BASE : ++ LSX_BIT_3_DESC_BASE; ++ ++class LSX_BIT_4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui4))]; ++} ++ ++class LSX_BIT_4N_DESC_BASE : ++ LSX_BIT_4_DESC_BASE; ++ ++class LSX_BIT_5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX_BIT_5N_DESC_BASE : ++ LSX_BIT_5_DESC_BASE; ++ ++class LSX_BIT_6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui6))]; ++} ++ ++class LSX_BIT_6N_DESC_BASE : ++ LSX_BIT_6_DESC_BASE; ++ ++class LSX_2R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RN_DESC_BASE : ++ LSX_2R_DESC_BASE; ++ ++class LSX_2RF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RFN_DESC_BASE : ++ LSX_2R_DESC_BASE; ++ ++class LSX_2RF_DESC_BASE_CVT { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RFN_DESC_BASE_CVT : ++ LSX_2RF_DESC_BASE_CVT; ++ ++class LSX_2RF_DESC_BASE_tmp { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_2R_REPL_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROS:$rj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj"); ++ list Pattern = [(set ROVD:$vd, (VT (OpNode ROS:$rj)))]; ++} ++ ++class LSX_INSERT_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$rj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$rj, Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U4N_DESC_BASE : ++ LSX_INSERT_U4_DESC_BASE; ++ ++class LSX_INSERT_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui3))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U3N_DESC_BASE : ++ LSX_INSERT_U3_DESC_BASE; ++ ++class LSX_INSERT_U2_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui2))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U2N_DESC_BASE : ++ LSX_INSERT_U2_DESC_BASE; ++ ++class LSX_INSERT_U1_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui1))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U1N_DESC_BASE : ++ LSX_INSERT_U1_DESC_BASE; ++ ++class LSX_PICK_U1_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui1"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui1))]; ++} ++ ++class LSX_PICK_U2_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui2"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui2))]; ++} ++ ++class LSX_PICK_U3_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui3"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui3))]; ++} ++ ++class LSX_PICK_U4_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui4"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui4))]; ++} ++ ++class LSX_ELM_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui3, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U2_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui2, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U1_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui1, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui4, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U4_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U3_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui3))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U2_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui2))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U1_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui1))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIT_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui3))]; ++} ++ ++class LSX_BIT_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui4))]; ++} ++ ++class LSX_BIT_U5_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; ++} ++ ++class LSX_BIT_U6_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui6))]; ++} ++ ++class LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; ++} ++ ++class LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; ++} ++ ++class LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; ++} ++ ++class LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_I8_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (LoongArchSHF immZExt8:$ui8, ROVJ:$vj))]; ++} ++ ++class LSX_I8_SHUF_DESC_BASE_D { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++def LoongArchSelect : SDNode<"LoongArchISD::VSELECT" ,SDTSelect>; ++def LoongArchVROR : SDNode<"LoongArchISD::VROR", ++ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>, []>; ++def LoongArchVRORI : SDNode<"LoongArchISD::VRORI", ++ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>, []>; ++ ++class LSX2_RORI_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui3))]; ++} ++ ++class LSX2_RORI_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui4))]; ++} ++ ++class LSX2_RORI_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX2_RORI_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui6))]; ++} ++ ++class LSX_BIND_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U4N_DESC_BASE : ++ LSX_BIND_U4_DESC_BASE; ++ ++class LSX_BIND_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U5N_DESC_BASE : ++ LSX_BIND_U5_DESC_BASE; ++ ++class LSX_BIND_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui6))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U6N_DESC_BASE : ++ LSX_BIND_U6_DESC_BASE; ++ ++class LSX_BIND_U7_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U7N_DESC_BASE : ++ LSX_BIND_U7_DESC_BASE; ++ ++ ++class LD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(set ROVD:$vd, (TyNode (OpNode Addr:$addr)))]; ++ string DecoderMethod = "DecodeLSX128Mem"; ++} ++ ++class ST_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(OpNode (TyNode ROVD:$vd), Addr:$addr)]; ++ string DecoderMethod = "DecodeLSX128Mem"; ++} ++ ++class LSX_VEC_ADDR_PSEUDO_BASE : ++ LSXPseudo<(outs), (ins ROVD:$vd, MemOpnd:$addr), ++ [(OpNode (TyNode ROVD:$vd), MemOpnd:$addr)]>; ++ ++ ++class LSX_SET_DESC_BASE { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROVD:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_SET_DESC_BASE_tmp { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROVD:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_VMul_Reg4 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; ++} ++ ++class LSX_4RF { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; ++} ++ ++ ++class LSX_VFCMP_Reg3 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_I12_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si12); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si12"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si12))]; ++} ++ ++class LSX_I11_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si11); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si11"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si11))]; ++} ++ ++class LSX_I10_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si10); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si10"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si10))]; ++} ++ ++class LSX_I9_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si9); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si9"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si9))]; ++} ++ ++ ++class LSX_I8_U1_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm1:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt1:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++ ++class LSX_I8_U2_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I8_U3_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I8_U4_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I5_U_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX_I5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$si5))]; ++} ++ ++class LSX_LDX_LA { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, RORK:$rk))]; ++} ++ ++class LSX_SDX_LA { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, RORK:$rk)]; ++} ++ ++class LSX_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_U5_4R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; ++} ++ ++class LSX_2R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; ++} ++ ++class LSX_2R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_2R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; ++} ++ ++class LSX_2R_3R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt4:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt6:$ui6))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U7_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_SELECT { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, vsplat_uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, vsplati8_uimm8:$ui8, ROVJ:$vj))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt8:$ui8))]; ++} ++ ++class LSX_I13_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins immOp:$i13); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $i13"); ++ list Pattern = [(set ROVD:$vd, (OpNode (Ty simm13:$i13)))]; ++ string DecoderMethod = "DecodeLSX128Mem13"; ++} ++ ++class LSX_I13_DESC_BASE_10 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ImmOp:$i10); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $i10"); ++ list Pattern = [(set ROVD:$vd, (OpNode Imm:$i10))]; ++ bit hasSideEffects = 0; ++ string DecoderMethod = "DecodeLSX128Mem10"; ++} ++ ++class LSX_BIT_U8_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui8))]; ++} ++ ++ ++class LSXPat pred = [HasLSX]> : ++ Pat, Requires; ++ ++// Instruction encoding. ++ ++ ++def VSADD_B : LSX_3R<0b01110000010001100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.b", LSX128BOpnd>; ++ ++def VSADD_H : LSX_3R<0b01110000010001101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.h", LSX128HOpnd>; ++ ++def VSADD_W : LSX_3R<0b01110000010001110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.w", LSX128WOpnd>; ++ ++def VSADD_D : LSX_3R<0b01110000010001111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.d", LSX128DOpnd>; ++ ++ ++def VSSUB_B : LSX_3R<0b01110000010010000>, ++ LSX_3RN_DESC_BASE<"vssub.b", LSX128BOpnd>; ++ ++def VSSUB_H : LSX_3R<0b01110000010010001>, ++ LSX_3RN_DESC_BASE<"vssub.h", LSX128HOpnd>; ++ ++def VSSUB_W : LSX_3R<0b01110000010010010>, ++ LSX_3RN_DESC_BASE<"vssub.w", LSX128WOpnd>; ++ ++def VSSUB_D : LSX_3R<0b01110000010010011>, ++ LSX_3RN_DESC_BASE<"vssub.d", LSX128DOpnd>; ++ ++ ++def VSADD_BU : LSX_3R<0b01110000010010100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.bu", LSX128BOpnd>; ++ ++def VSADD_HU : LSX_3R<0b01110000010010101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.hu", LSX128HOpnd>; ++ ++def VSADD_WU : LSX_3R<0b01110000010010110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.wu", LSX128WOpnd>; ++ ++def VSADD_DU : LSX_3R<0b01110000010010111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.du", LSX128DOpnd>; ++ ++ ++def VSSUB_BU : LSX_3R<0b01110000010011000>, ++ LSX_3RN_DESC_BASE<"vssub.bu", LSX128BOpnd>; ++ ++def VSSUB_HU : LSX_3R<0b01110000010011001>, ++ LSX_3RN_DESC_BASE<"vssub.hu", LSX128HOpnd>; ++ ++def VSSUB_WU : LSX_3R<0b01110000010011010>, ++ LSX_3RN_DESC_BASE<"vssub.wu", LSX128WOpnd>; ++ ++def VSSUB_DU : LSX_3R<0b01110000010011011>, ++ LSX_3RN_DESC_BASE<"vssub.du", LSX128DOpnd>; ++ ++ ++def VHADDW_H_B : LSX_3R<0b01110000010101000>, ++ LSX_3RN_DESC_BASE<"vhaddw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHADDW_W_H : LSX_3R<0b01110000010101001>, ++ LSX_3RN_DESC_BASE<"vhaddw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHADDW_D_W : LSX_3R<0b01110000010101010>, ++ LSX_3RN_DESC_BASE<"vhaddw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHSUBW_H_B : LSX_3R<0b01110000010101100>, ++ LSX_3RN_DESC_BASE<"vhsubw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHSUBW_W_H : LSX_3R<0b01110000010101101>, ++ LSX_3RN_DESC_BASE<"vhsubw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHSUBW_D_W : LSX_3R<0b01110000010101110>, ++ LSX_3RN_DESC_BASE<"vhsubw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHADDW_HU_BU : LSX_3R<0b01110000010110000>, ++ LSX_3RN_DESC_BASE<"vhaddw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHADDW_WU_HU : LSX_3R<0b01110000010110001>, ++ LSX_3RN_DESC_BASE<"vhaddw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHADDW_DU_WU : LSX_3R<0b01110000010110010>, ++ LSX_3RN_DESC_BASE<"vhaddw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHSUBW_HU_BU : LSX_3R<0b01110000010110100>, ++ LSX_3RN_DESC_BASE<"vhsubw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHSUBW_WU_HU : LSX_3R<0b01110000010110101>, ++ LSX_3RN_DESC_BASE<"vhsubw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHSUBW_DU_WU : LSX_3R<0b01110000010110110>, ++ LSX_3RN_DESC_BASE<"vhsubw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VADDA_B : LSX_3R<0b01110000010111000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.b", LSX128BOpnd>; ++ ++def VADDA_H : LSX_3R<0b01110000010111001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.h", LSX128HOpnd>; ++ ++def VADDA_W : LSX_3R<0b01110000010111010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.w", LSX128WOpnd>; ++ ++def VADDA_D : LSX_3R<0b01110000010111011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.d", LSX128DOpnd>; ++ ++ ++def VABSD_B : LSX_3R<0b01110000011000000>, ++ LSX_3RN_DESC_BASE<"vabsd.b", LSX128BOpnd>; ++ ++def VABSD_H : LSX_3R<0b01110000011000001>, ++ LSX_3RN_DESC_BASE<"vabsd.h", LSX128HOpnd>; ++ ++def VABSD_W : LSX_3R<0b01110000011000010>, ++ LSX_3RN_DESC_BASE<"vabsd.w", LSX128WOpnd>; ++ ++def VABSD_D : LSX_3R<0b01110000011000011>, ++ LSX_3RN_DESC_BASE<"vabsd.d", LSX128DOpnd>; ++ ++ ++def VABSD_BU : LSX_3R<0b01110000011000100>, ++ LSX_3RN_DESC_BASE<"vabsd.bu", LSX128BOpnd>; ++ ++def VABSD_HU : LSX_3R<0b01110000011000101>, ++ LSX_3RN_DESC_BASE<"vabsd.hu", LSX128HOpnd>; ++ ++def VABSD_WU : LSX_3R<0b01110000011000110>, ++ LSX_3RN_DESC_BASE<"vabsd.wu", LSX128WOpnd>; ++ ++def VABSD_DU : LSX_3R<0b01110000011000111>, ++ LSX_3RN_DESC_BASE<"vabsd.du", LSX128DOpnd>; ++ ++ ++def VAVG_B : LSX_3R<0b01110000011001000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.b", LSX128BOpnd>; ++ ++def VAVG_H : LSX_3R<0b01110000011001001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.h", LSX128HOpnd>; ++ ++def VAVG_W : LSX_3R<0b01110000011001010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.w", LSX128WOpnd>; ++ ++def VAVG_D : LSX_3R<0b01110000011001011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.d", LSX128DOpnd>; ++ ++ ++def VAVG_BU : LSX_3R<0b01110000011001100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.bu", LSX128BOpnd>; ++ ++def VAVG_HU : LSX_3R<0b01110000011001101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.hu", LSX128HOpnd>; ++ ++def VAVG_WU : LSX_3R<0b01110000011001110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.wu", LSX128WOpnd>; ++ ++def VAVG_DU : LSX_3R<0b01110000011001111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.du", LSX128DOpnd>; ++ ++ ++def VAVGR_B : LSX_3R<0b01110000011010000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.b", LSX128BOpnd>; ++ ++def VAVGR_H : LSX_3R<0b01110000011010001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.h", LSX128HOpnd>; ++ ++def VAVGR_W : LSX_3R<0b01110000011010010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.w", LSX128WOpnd>; ++ ++def VAVGR_D : LSX_3R<0b01110000011010011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.d", LSX128DOpnd>; ++ ++ ++def VAVGR_BU : LSX_3R<0b01110000011010100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.bu", LSX128BOpnd>; ++ ++def VAVGR_HU : LSX_3R<0b01110000011010101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.hu", LSX128HOpnd>; ++ ++def VAVGR_WU : LSX_3R<0b01110000011010110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.wu", LSX128WOpnd>; ++ ++def VAVGR_DU : LSX_3R<0b01110000011010111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.du", LSX128DOpnd>; ++ ++ ++def VMAX_B : LSX_3R<0b01110000011100000>, ++ LSX_3R_DESC_BASE<"vmax.b", smax, LSX128BOpnd>; ++ ++def VMAX_H : LSX_3R<0b01110000011100001>, ++ LSX_3R_DESC_BASE<"vmax.h", smax, LSX128HOpnd>; ++ ++def VMAX_W : LSX_3R<0b01110000011100010>, ++ LSX_3R_DESC_BASE<"vmax.w", smax, LSX128WOpnd>; ++ ++def VMAX_D : LSX_3R<0b01110000011100011>, ++ LSX_3R_DESC_BASE<"vmax.d", smax, LSX128DOpnd>; ++ ++ ++def VMIN_B : LSX_3R<0b01110000011100100>, ++ LSX_3R_DESC_BASE<"vmin.b", smin, LSX128BOpnd>; ++ ++def VMIN_H : LSX_3R<0b01110000011100101>, ++ LSX_3R_DESC_BASE<"vmin.h", smin, LSX128HOpnd>; ++ ++def VMIN_W : LSX_3R<0b01110000011100110>, ++ LSX_3R_DESC_BASE<"vmin.w", smin, LSX128WOpnd>; ++ ++def VMIN_D : LSX_3R<0b01110000011100111>, ++ LSX_3R_DESC_BASE<"vmin.d", smin, LSX128DOpnd>; ++ ++ ++def VMAX_BU : LSX_3R<0b01110000011101000>, ++ LSX_3R_DESC_BASE<"vmax.bu", umax, LSX128BOpnd>; ++ ++def VMAX_HU : LSX_3R<0b01110000011101001>, ++ LSX_3R_DESC_BASE<"vmax.hu", umax, LSX128HOpnd>; ++ ++def VMAX_WU : LSX_3R<0b01110000011101010>, ++ LSX_3R_DESC_BASE<"vmax.wu", umax, LSX128WOpnd>; ++ ++def VMAX_DU : LSX_3R<0b01110000011101011>, ++ LSX_3R_DESC_BASE<"vmax.du", umax, LSX128DOpnd>; ++ ++ ++def VMIN_BU : LSX_3R<0b01110000011101100>, ++ LSX_3R_DESC_BASE<"vmin.bu", umin, LSX128BOpnd>; ++ ++def VMIN_HU : LSX_3R<0b01110000011101101>, ++ LSX_3R_DESC_BASE<"vmin.hu", umin, LSX128HOpnd>; ++ ++def VMIN_WU : LSX_3R<0b01110000011101110>, ++ LSX_3R_DESC_BASE<"vmin.wu", umin, LSX128WOpnd>; ++ ++def VMIN_DU : LSX_3R<0b01110000011101111>, ++ LSX_3R_DESC_BASE<"vmin.du", umin, LSX128DOpnd>; ++ ++ ++def VMUL_B : LSX_3R<0b01110000100001000>, ++ LSX_3R_DESC_BASE<"vmul.b", mul, LSX128BOpnd>; ++ ++def VMUL_H : LSX_3R<0b01110000100001001>, ++ LSX_3R_DESC_BASE<"vmul.h", mul, LSX128HOpnd>; ++ ++def VMUL_W : LSX_3R<0b01110000100001010>, ++ LSX_3R_DESC_BASE<"vmul.w", mul, LSX128WOpnd>; ++ ++def VMUL_D : LSX_3R<0b01110000100001011>, ++ LSX_3R_DESC_BASE<"vmul.d", mul, LSX128DOpnd>; ++ ++ ++def VMADD_B : LSX_3R<0b01110000101010000>, ++ LSX_3R_4R_DESC_BASE<"vmadd.b", muladd, LSX128BOpnd>; ++ ++def VMADD_H : LSX_3R<0b01110000101010001>, ++ LSX_3R_4R_DESC_BASE<"vmadd.h", muladd, LSX128HOpnd>; ++ ++def VMADD_W : LSX_3R<0b01110000101010010>, ++ LSX_3R_4R_DESC_BASE<"vmadd.w", muladd, LSX128WOpnd>; ++ ++def VMADD_D : LSX_3R<0b01110000101010011>, ++ LSX_3R_4R_DESC_BASE<"vmadd.d", muladd, LSX128DOpnd>; ++ ++ ++def VMSUB_B : LSX_3R<0b01110000101010100>, ++ LSX_3R_4R_DESC_BASE<"vmsub.b", mulsub, LSX128BOpnd>; ++ ++def VMSUB_H : LSX_3R<0b01110000101010101>, ++ LSX_3R_4R_DESC_BASE<"vmsub.h", mulsub, LSX128HOpnd>; ++ ++def VMSUB_W : LSX_3R<0b01110000101010110>, ++ LSX_3R_4R_DESC_BASE<"vmsub.w", mulsub, LSX128WOpnd>; ++ ++def VMSUB_D : LSX_3R<0b01110000101010111>, ++ LSX_3R_4R_DESC_BASE<"vmsub.d", mulsub, LSX128DOpnd>; ++ ++ ++def VDIV_B : LSX_3R<0b01110000111000000>, ++ LSX_3R_DESC_BASE<"vdiv.b", sdiv, LSX128BOpnd>; ++ ++def VDIV_H : LSX_3R<0b01110000111000001>, ++ LSX_3R_DESC_BASE<"vdiv.h", sdiv, LSX128HOpnd>; ++ ++def VDIV_W : LSX_3R<0b01110000111000010>, ++ LSX_3R_DESC_BASE<"vdiv.w", sdiv, LSX128WOpnd>; ++ ++def VDIV_D : LSX_3R<0b01110000111000011>, ++ LSX_3R_DESC_BASE<"vdiv.d", sdiv, LSX128DOpnd>; ++ ++ ++def VMOD_B : LSX_3R<0b01110000111000100>, ++ LSX_3R_DESC_BASE<"vmod.b", srem, LSX128BOpnd>; ++ ++def VMOD_H : LSX_3R<0b01110000111000101>, ++ LSX_3R_DESC_BASE<"vmod.h", srem, LSX128HOpnd>; ++ ++def VMOD_W : LSX_3R<0b01110000111000110>, ++ LSX_3R_DESC_BASE<"vmod.w", srem, LSX128WOpnd>; ++ ++def VMOD_D : LSX_3R<0b01110000111000111>, ++ LSX_3R_DESC_BASE<"vmod.d", srem, LSX128DOpnd>; ++ ++ ++def VDIV_BU : LSX_3R<0b01110000111001000>, ++ LSX_3R_DESC_BASE<"vdiv.bu", udiv, LSX128BOpnd>; ++ ++def VDIV_HU : LSX_3R<0b01110000111001001>, ++ LSX_3R_DESC_BASE<"vdiv.hu", udiv, LSX128HOpnd>; ++ ++def VDIV_WU : LSX_3R<0b01110000111001010>, ++ LSX_3R_DESC_BASE<"vdiv.wu", udiv, LSX128WOpnd>; ++ ++def VDIV_DU : LSX_3R<0b01110000111001011>, ++ LSX_3R_DESC_BASE<"vdiv.du", udiv, LSX128DOpnd>; ++ ++ ++def VMOD_BU : LSX_3R<0b01110000111001100>, ++ LSX_3R_DESC_BASE<"vmod.bu", urem, LSX128BOpnd>; ++ ++def VMOD_HU : LSX_3R<0b01110000111001101>, ++ LSX_3R_DESC_BASE<"vmod.hu", urem, LSX128HOpnd>; ++ ++def VMOD_WU : LSX_3R<0b01110000111001110>, ++ LSX_3R_DESC_BASE<"vmod.wu", urem, LSX128WOpnd>; ++ ++def VMOD_DU : LSX_3R<0b01110000111001111>, ++ LSX_3R_DESC_BASE<"vmod.du", urem, LSX128DOpnd>; ++ ++ ++def VSLL_B : LSX_3R<0b01110000111010000>, ++ LSX_3R_DESC_BASE<"vsll.b", shl, LSX128BOpnd>; ++ ++def VSLL_H : LSX_3R<0b01110000111010001>, ++ LSX_3R_DESC_BASE<"vsll.h", shl, LSX128HOpnd>; ++ ++def VSLL_W : LSX_3R<0b01110000111010010>, ++ LSX_3R_DESC_BASE<"vsll.w", shl, LSX128WOpnd>; ++ ++def VSLL_D : LSX_3R<0b01110000111010011>, ++ LSX_3R_DESC_BASE<"vsll.d", shl, LSX128DOpnd>; ++ ++ ++def VSRL_B : LSX_3R<0b01110000111010100>, ++ LSX_3R_DESC_BASE<"vsrl.b", srl, LSX128BOpnd>; ++ ++def VSRL_H : LSX_3R<0b01110000111010101>, ++ LSX_3R_DESC_BASE<"vsrl.h", srl, LSX128HOpnd>; ++ ++def VSRL_W : LSX_3R<0b01110000111010110>, ++ LSX_3R_DESC_BASE<"vsrl.w", srl, LSX128WOpnd>; ++ ++def VSRL_D : LSX_3R<0b01110000111010111>, ++ LSX_3R_DESC_BASE<"vsrl.d", srl, LSX128DOpnd>; ++ ++ ++def VSRA_B : LSX_3R<0b01110000111011000>, ++ LSX_3R_DESC_BASE<"vsra.b", sra, LSX128BOpnd>; ++ ++def VSRA_H : LSX_3R<0b01110000111011001>, ++ LSX_3R_DESC_BASE<"vsra.h", sra, LSX128HOpnd>; ++ ++def VSRA_W : LSX_3R<0b01110000111011010>, ++ LSX_3R_DESC_BASE<"vsra.w", sra, LSX128WOpnd>; ++ ++def VSRA_D : LSX_3R<0b01110000111011011>, ++ LSX_3R_DESC_BASE<"vsra.d", sra, LSX128DOpnd>; ++ ++ ++def VSRLR_B : LSX_3R<0b01110000111100000>, ++ LSX_3RN_DESC_BASE<"vsrlr.b", LSX128BOpnd>; ++ ++def VSRLR_H : LSX_3R<0b01110000111100001>, ++ LSX_3RN_DESC_BASE<"vsrlr.h", LSX128HOpnd>; ++ ++def VSRLR_W : LSX_3R<0b01110000111100010>, ++ LSX_3RN_DESC_BASE<"vsrlr.w", LSX128WOpnd>; ++ ++def VSRLR_D : LSX_3R<0b01110000111100011>, ++ LSX_3RN_DESC_BASE<"vsrlr.d", LSX128DOpnd>; ++ ++ ++def VSRAR_B : LSX_3R<0b01110000111100100>, ++ LSX_3RN_DESC_BASE<"vsrar.b", LSX128BOpnd>; ++ ++def VSRAR_H : LSX_3R<0b01110000111100101>, ++ LSX_3RN_DESC_BASE<"vsrar.h", LSX128HOpnd>; ++ ++def VSRAR_W : LSX_3R<0b01110000111100110>, ++ LSX_3RN_DESC_BASE<"vsrar.w", LSX128WOpnd>; ++ ++def VSRAR_D : LSX_3R<0b01110000111100111>, ++ LSX_3RN_DESC_BASE<"vsrar.d", LSX128DOpnd>; ++ ++ ++def VBITCLR_B : LSX_3R<0b01110001000011000>, ++ LSX_3R_DESC_BASE<"vbitclr.b", vbitclr_b, LSX128BOpnd>; ++ ++def VBITCLR_H : LSX_3R<0b01110001000011001>, ++ LSX_3R_DESC_BASE<"vbitclr.h", vbitclr_h, LSX128HOpnd>; ++ ++def VBITCLR_W : LSX_3R<0b01110001000011010>, ++ LSX_3R_DESC_BASE<"vbitclr.w", vbitclr_w, LSX128WOpnd>; ++ ++def VBITCLR_D : LSX_3R<0b01110001000011011>, ++ LSX_3R_DESC_BASE<"vbitclr.d", vbitclr_d, LSX128DOpnd>; ++ ++ ++def VBITSET_B : LSX_3R<0b01110001000011100>, ++ LSX_3RN_DESC_BASE<"vbitset.b", LSX128BOpnd>; ++ ++def VBITSET_H : LSX_3R<0b01110001000011101>, ++ LSX_3RN_DESC_BASE<"vbitset.h", LSX128HOpnd>; ++ ++def VBITSET_W : LSX_3R<0b01110001000011110>, ++ LSX_3RN_DESC_BASE<"vbitset.w", LSX128WOpnd>; ++ ++def VBITSET_D : LSX_3R<0b01110001000011111>, ++ LSX_3RN_DESC_BASE<"vbitset.d", LSX128DOpnd>; ++ ++ ++def VBITREV_B : LSX_3R<0b01110001000100000>, ++ LSX_3RN_DESC_BASE<"vbitrev.b", LSX128BOpnd>; ++ ++def VBITREV_H : LSX_3R<0b01110001000100001>, ++ LSX_3RN_DESC_BASE<"vbitrev.h", LSX128HOpnd>; ++ ++def VBITREV_W : LSX_3R<0b01110001000100010>, ++ LSX_3RN_DESC_BASE<"vbitrev.w", LSX128WOpnd>; ++ ++def VBITREV_D : LSX_3R<0b01110001000100011>, ++ LSX_3RN_DESC_BASE<"vbitrev.d", LSX128DOpnd>; ++ ++ ++def VPACKEV_B : LSX_3R<0b01110001000101100>, ++ LSX_3R_DESC_BASE<"vpackev.b", LoongArchVPACKEV, LSX128BOpnd>; ++ ++def VPACKEV_H : LSX_3R<0b01110001000101101>, ++ LSX_3R_DESC_BASE<"vpackev.h", LoongArchVPACKEV, LSX128HOpnd>; ++ ++def VPACKEV_W : LSX_3R<0b01110001000101110>, ++ LSX_3R_DESC_BASE<"vpackev.w", LoongArchVPACKEV, LSX128WOpnd>; ++ ++def VPACKEV_D : LSX_3R<0b01110001000101111>, ++ LSX_3R_DESC_BASE<"vpackev.d", LoongArchVPACKEV, LSX128DOpnd>; ++ ++ ++def VPACKOD_B : LSX_3R<0b01110001000110000>, ++ LSX_3R_DESC_BASE<"vpackod.b", LoongArchVPACKOD, LSX128BOpnd>; ++ ++def VPACKOD_H : LSX_3R<0b01110001000110001>, ++ LSX_3R_DESC_BASE<"vpackod.h", LoongArchVPACKOD, LSX128HOpnd>; ++ ++def VPACKOD_W : LSX_3R<0b01110001000110010>, ++ LSX_3R_DESC_BASE<"vpackod.w", LoongArchVPACKOD, LSX128WOpnd>; ++ ++def VPACKOD_D : LSX_3R<0b01110001000110011>, ++ LSX_3R_DESC_BASE<"vpackod.d", LoongArchVPACKOD, LSX128DOpnd>; ++ ++ ++def VILVL_B : LSX_3R<0b01110001000110100>, ++ LSX_3R_DESC_BASE<"vilvl.b", LoongArchVILVL, LSX128BOpnd>; ++ ++def VILVL_H : LSX_3R<0b01110001000110101>, ++ LSX_3R_DESC_BASE<"vilvl.h", LoongArchVILVL, LSX128HOpnd>; ++ ++def VILVL_W : LSX_3R<0b01110001000110110>, ++ LSX_3R_DESC_BASE<"vilvl.w", LoongArchVILVL, LSX128WOpnd>; ++ ++def VILVL_D : LSX_3R<0b01110001000110111>, ++ LSX_3R_DESC_BASE<"vilvl.d", LoongArchVILVL, LSX128DOpnd>; ++ ++ ++def VILVH_B : LSX_3R<0b01110001000111000>, ++ LSX_3R_DESC_BASE<"vilvh.b", LoongArchVILVH, LSX128BOpnd>; ++ ++def VILVH_H : LSX_3R<0b01110001000111001>, ++ LSX_3R_DESC_BASE<"vilvh.h", LoongArchVILVH, LSX128HOpnd>; ++ ++def VILVH_W : LSX_3R<0b01110001000111010>, ++ LSX_3R_DESC_BASE<"vilvh.w", LoongArchVILVH, LSX128WOpnd>; ++ ++def VILVH_D : LSX_3R<0b01110001000111011>, ++ LSX_3R_DESC_BASE<"vilvh.d", LoongArchVILVH, LSX128DOpnd>; ++ ++ ++def VPICKEV_B : LSX_3R<0b01110001000111100>, ++ LSX_3R_DESC_BASE<"vpickev.b", LoongArchVPICKEV, LSX128BOpnd>; ++ ++def VPICKEV_H : LSX_3R<0b01110001000111101>, ++ LSX_3R_DESC_BASE<"vpickev.h", LoongArchVPICKEV, LSX128HOpnd>; ++ ++def VPICKEV_W : LSX_3R<0b01110001000111110>, ++ LSX_3R_DESC_BASE<"vpickev.w", LoongArchVPICKEV, LSX128WOpnd>; ++ ++def VPICKEV_D : LSX_3R<0b01110001000111111>, ++ LSX_3R_DESC_BASE<"vpickev.d", LoongArchVPICKEV, LSX128DOpnd>; ++ ++ ++def VPICKOD_B : LSX_3R<0b01110001001000000>, ++ LSX_3R_DESC_BASE<"vpickod.b", LoongArchVPICKOD, LSX128BOpnd>; ++ ++def VPICKOD_H : LSX_3R<0b01110001001000001>, ++ LSX_3R_DESC_BASE<"vpickod.h", LoongArchVPICKOD, LSX128HOpnd>; ++ ++def VPICKOD_W : LSX_3R<0b01110001001000010>, ++ LSX_3R_DESC_BASE<"vpickod.w", LoongArchVPICKOD, LSX128WOpnd>; ++ ++def VPICKOD_D : LSX_3R<0b01110001001000011>, ++ LSX_3R_DESC_BASE<"vpickod.d", LoongArchVPICKOD, LSX128DOpnd>; ++ ++ ++def VREPLVE_B : LSX_3R_1GP<0b01110001001000100>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.b", vsplati8_elt, LSX128BOpnd>; ++ ++def VREPLVE_H : LSX_3R_1GP<0b01110001001000101>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.h", vsplati16_elt, LSX128HOpnd>; ++ ++def VREPLVE_W : LSX_3R_1GP<0b01110001001000110>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.w", vsplati32_elt, LSX128WOpnd>; ++ ++def VREPLVE_D : LSX_3R_1GP<0b01110001001000111>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.d", vsplati64_elt, LSX128DOpnd>; ++ ++ ++def VAND_V : LSX_3R<0b01110001001001100>, ++ LSX_VEC_DESC_BASE<"vand.v", and, LSX128BOpnd>; ++class AND_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class AND_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class AND_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VOR_V : LSX_3R<0b01110001001001101>, ++ LSX_VEC_DESC_BASE<"vor.v", or, LSX128BOpnd>; ++class OR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class OR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class OR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VXOR_V : LSX_3R<0b01110001001001110>, ++ LSX_VEC_DESC_BASE<"vxor.v", xor, LSX128BOpnd>; ++class XOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class XOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class XOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VNOR_V : LSX_3R<0b01110001001001111>, ++ LSX_VEC_DESC_BASE<"vnor.v", LoongArchVNOR, LSX128BOpnd>; ++class NOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class NOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class NOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VFADD_S : LSX_3R<0b01110001001100001>, IsCommutable, ++ LSX_3RF_DESC_BASE<"vfadd.s", fadd, LSX128WOpnd>; ++ ++def VFADD_D : LSX_3R<0b01110001001100010>, IsCommutable, ++ LSX_3RF_DESC_BASE<"vfadd.d", fadd, LSX128DOpnd>; ++ ++ ++def VFSUB_S : LSX_3R<0b01110001001100101>, ++ LSX_3RF_DESC_BASE<"vfsub.s", fsub, LSX128WOpnd>; ++ ++def VFSUB_D : LSX_3R<0b01110001001100110>, ++ LSX_3RF_DESC_BASE<"vfsub.d", fsub, LSX128DOpnd>; ++ ++ ++def VFMUL_S : LSX_3R<0b01110001001110001>, ++ LSX_3RF_DESC_BASE<"vfmul.s", fmul, LSX128WOpnd>; ++ ++def VFMUL_D : LSX_3R<0b01110001001110010>, ++ LSX_3RF_DESC_BASE<"vfmul.d", fmul, LSX128DOpnd>; ++ ++ ++def VFDIV_S : LSX_3R<0b01110001001110101>, ++ LSX_3RF_DESC_BASE<"vfdiv.s", fdiv, LSX128WOpnd>; ++ ++def VFDIV_D : LSX_3R<0b01110001001110110>, ++ LSX_3RF_DESC_BASE<"vfdiv.d", fdiv, LSX128DOpnd>; ++ ++ ++def VFMAX_S : LSX_3R<0b01110001001111001>, ++ LSX_3RFN_DESC_BASE<"vfmax.s", LSX128WOpnd>; ++ ++def VFMAX_D : LSX_3R<0b01110001001111010>, ++ LSX_3RFN_DESC_BASE<"vfmax.d", LSX128DOpnd>; ++ ++ ++def VFMIN_S : LSX_3R<0b01110001001111101>, ++ LSX_3RFN_DESC_BASE<"vfmin.s", LSX128WOpnd>; ++ ++def VFMIN_D : LSX_3R<0b01110001001111110>, ++ LSX_3RFN_DESC_BASE<"vfmin.d", LSX128DOpnd>; ++ ++ ++def VFMAXA_S : LSX_3R<0b01110001010000001>, ++ LSX_3RFN_DESC_BASE<"vfmaxa.s", LSX128WOpnd>; ++ ++def VFMAXA_D : LSX_3R<0b01110001010000010>, ++ LSX_3RFN_DESC_BASE<"vfmaxa.d", LSX128DOpnd>; ++ ++ ++def VFMINA_S : LSX_3R<0b01110001010000101>, ++ LSX_3RFN_DESC_BASE<"vfmina.s", LSX128WOpnd>; ++ ++def VFMINA_D : LSX_3R<0b01110001010000110>, ++ LSX_3RFN_DESC_BASE<"vfmina.d", LSX128DOpnd>; ++ ++ ++def VSHUF_H : LSX_3R<0b01110001011110101>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.h", LSX128HOpnd>; ++ ++def VSHUF_W : LSX_3R<0b01110001011110110>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.w", LSX128WOpnd>; ++ ++def VSHUF_D : LSX_3R<0b01110001011110111>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.d", LSX128DOpnd>; ++ ++ ++def VSEQI_B : LSX_I5<0b01110010100000000>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.b", int_loongarch_lsx_vseqi_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSEQI_H : LSX_I5<0b01110010100000001>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.h", int_loongarch_lsx_vseqi_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSEQI_W : LSX_I5<0b01110010100000010>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.w", int_loongarch_lsx_vseqi_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSEQI_D : LSX_I5<0b01110010100000011>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.d", int_loongarch_lsx_vseqi_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLEI_B : LSX_I5<0b01110010100000100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.b", int_loongarch_lsx_vslei_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSLEI_H : LSX_I5<0b01110010100000101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.h", int_loongarch_lsx_vslei_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSLEI_W : LSX_I5<0b01110010100000110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.w", int_loongarch_lsx_vslei_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSLEI_D : LSX_I5<0b01110010100000111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.d", int_loongarch_lsx_vslei_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLEI_BU : LSX_I5_U<0b01110010100001000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.bu", int_loongarch_lsx_vslei_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VSLEI_HU : LSX_I5_U<0b01110010100001001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.hu", int_loongarch_lsx_vslei_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSLEI_WU : LSX_I5_U<0b01110010100001010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.wu", int_loongarch_lsx_vslei_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSLEI_DU : LSX_I5_U<0b01110010100001011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.du", int_loongarch_lsx_vslei_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VSLTI_B : LSX_I5<0b01110010100001100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.b", int_loongarch_lsx_vslti_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSLTI_H : LSX_I5<0b01110010100001101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.h", int_loongarch_lsx_vslti_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSLTI_W : LSX_I5<0b01110010100001110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.w", int_loongarch_lsx_vslti_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSLTI_D : LSX_I5<0b01110010100001111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.d", int_loongarch_lsx_vslti_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLTI_BU : LSX_I5_U<0b01110010100010000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.bu", int_loongarch_lsx_vslti_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VSLTI_HU : LSX_I5_U<0b01110010100010001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.hu", int_loongarch_lsx_vslti_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSLTI_WU : LSX_I5_U<0b01110010100010010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.wu", int_loongarch_lsx_vslti_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSLTI_DU : LSX_I5_U<0b01110010100010011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.du", int_loongarch_lsx_vslti_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VADDI_BU : LSX_I5_U<0b01110010100010100>, ++ LSX_I5_U_DESC_BASE<"vaddi.bu", add, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VADDI_HU : LSX_I5_U<0b01110010100010101>, ++ LSX_I5_U_DESC_BASE<"vaddi.hu", add, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VADDI_WU : LSX_I5_U<0b01110010100010110>, ++ LSX_I5_U_DESC_BASE<"vaddi.wu", add, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VADDI_DU : LSX_I5_U<0b01110010100010111>, ++ LSX_I5_U_DESC_BASE<"vaddi.du", add, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VSUBI_BU : LSX_I5_U<0b01110010100011000>, ++ LSX_I5_U_DESC_BASE<"vsubi.bu", sub, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSUBI_HU : LSX_I5_U<0b01110010100011001>, ++ LSX_I5_U_DESC_BASE<"vsubi.hu", sub, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSUBI_WU : LSX_I5_U<0b01110010100011010>, ++ LSX_I5_U_DESC_BASE<"vsubi.wu", sub, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSUBI_DU : LSX_I5_U<0b01110010100011011>, ++ LSX_I5_U_DESC_BASE<"vsubi.du", sub, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VMAXI_B : LSX_I5<0b01110010100100000>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.b", int_loongarch_lsx_vmaxi_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VMAXI_H : LSX_I5<0b01110010100100001>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.h", int_loongarch_lsx_vmaxi_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VMAXI_W : LSX_I5<0b01110010100100010>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.w", int_loongarch_lsx_vmaxi_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VMAXI_D : LSX_I5<0b01110010100100011>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.d", int_loongarch_lsx_vmaxi_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VMINI_B : LSX_I5<0b01110010100100100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.b", int_loongarch_lsx_vmini_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VMINI_H : LSX_I5<0b01110010100100101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.h", int_loongarch_lsx_vmini_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VMINI_W : LSX_I5<0b01110010100100110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.w", int_loongarch_lsx_vmini_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VMINI_D : LSX_I5<0b01110010100100111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.d", int_loongarch_lsx_vmini_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VMAXI_BU : LSX_I5_U<0b01110010100101000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.bu", int_loongarch_lsx_vmaxi_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VMAXI_HU : LSX_I5_U<0b01110010100101001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.hu", int_loongarch_lsx_vmaxi_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VMAXI_WU : LSX_I5_U<0b01110010100101010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.wu", int_loongarch_lsx_vmaxi_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VMAXI_DU : LSX_I5_U<0b01110010100101011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.du", int_loongarch_lsx_vmaxi_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VMINI_BU : LSX_I5_U<0b01110010100101100>, ++ LSX_I5_U_DESC_BASE<"vmini.bu", umin, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VMINI_HU : LSX_I5_U<0b01110010100101101>, ++ LSX_I5_U_DESC_BASE<"vmini.hu", umin, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VMINI_WU : LSX_I5_U<0b01110010100101110>, ++ LSX_I5_U_DESC_BASE<"vmini.wu", umin, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VMINI_DU : LSX_I5_U<0b01110010100101111>, ++ LSX_I5_U_DESC_BASE<"vmini.du", umin, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VCLO_B : LSX_2R<0b0111001010011100000000>, ++ LSX_2RN_DESC_BASE<"vclo.b", LSX128BOpnd>; ++ ++def VCLO_H : LSX_2R<0b0111001010011100000001>, ++ LSX_2RN_DESC_BASE<"vclo.h", LSX128HOpnd>; ++ ++def VCLO_W : LSX_2R<0b0111001010011100000010>, ++ LSX_2RN_DESC_BASE<"vclo.w", LSX128WOpnd>; ++ ++def VCLO_D : LSX_2R<0b0111001010011100000011>, ++ LSX_2RN_DESC_BASE<"vclo.d", LSX128DOpnd>; ++ ++ ++def VCLZ_B : LSX_2R<0b0111001010011100000100>, ++ LSX_2R_DESC_BASE<"vclz.b", ctlz, LSX128BOpnd>; ++ ++def VCLZ_H : LSX_2R<0b0111001010011100000101>, ++ LSX_2R_DESC_BASE<"vclz.h", ctlz, LSX128HOpnd>; ++ ++def VCLZ_W : LSX_2R<0b0111001010011100000110>, ++ LSX_2R_DESC_BASE<"vclz.w", ctlz, LSX128WOpnd>; ++ ++def VCLZ_D : LSX_2R<0b0111001010011100000111>, ++ LSX_2R_DESC_BASE<"vclz.d", ctlz, LSX128DOpnd>; ++ ++ ++def VPCNT_B : LSX_2R<0b0111001010011100001000>, ++ LSX_2R_DESC_BASE<"vpcnt.b", ctpop, LSX128BOpnd>; ++ ++def VPCNT_H : LSX_2R<0b0111001010011100001001>, ++ LSX_2R_DESC_BASE<"vpcnt.h", ctpop, LSX128HOpnd>; ++ ++def VPCNT_W : LSX_2R<0b0111001010011100001010>, ++ LSX_2R_DESC_BASE<"vpcnt.w", ctpop, LSX128WOpnd>; ++ ++def VPCNT_D : LSX_2R<0b0111001010011100001011>, ++ LSX_2R_DESC_BASE<"vpcnt.d", ctpop, LSX128DOpnd>; ++ ++ ++def VFLOGB_S : LSX_2R<0b0111001010011100110001>, ++ LSX_2RFN_DESC_BASE<"vflogb.s", LSX128WOpnd>; ++ ++def VFLOGB_D : LSX_2R<0b0111001010011100110010>, ++ LSX_2RFN_DESC_BASE<"vflogb.d", LSX128DOpnd>; ++ ++ ++def VFCLASS_S : LSX_2R<0b0111001010011100110101>, ++ LSX_2RFN_DESC_BASE<"vfclass.s", LSX128WOpnd>; ++ ++def VFCLASS_D : LSX_2R<0b0111001010011100110110>, ++ LSX_2RFN_DESC_BASE<"vfclass.d", LSX128DOpnd>; ++ ++ ++def VFSQRT_S : LSX_2R<0b0111001010011100111001>, ++ LSX_2RF_DESC_BASE<"vfsqrt.s", fsqrt, LSX128WOpnd>; ++ ++def VFSQRT_D : LSX_2R<0b0111001010011100111010>, ++ LSX_2RF_DESC_BASE<"vfsqrt.d", fsqrt, LSX128DOpnd>; ++ ++ ++def VFRECIP_S : LSX_2R<0b0111001010011100111101>, ++ LSX_2RFN_DESC_BASE<"vfrecip.s", LSX128WOpnd>; ++ ++def VFRECIP_D : LSX_2R<0b0111001010011100111110>, ++ LSX_2RFN_DESC_BASE<"vfrecip.d", LSX128DOpnd>; ++ ++ ++def VFRSQRT_S : LSX_2R<0b0111001010011101000001>, ++ LSX_2RFN_DESC_BASE<"vfrsqrt.s", LSX128WOpnd>; ++ ++def VFRSQRT_D : LSX_2R<0b0111001010011101000010>, ++ LSX_2RFN_DESC_BASE<"vfrsqrt.d", LSX128DOpnd>; ++ ++ ++def VFRINT_S : LSX_2R<0b0111001010011101001101>, ++ LSX_2RF_DESC_BASE<"vfrint.s", frint, LSX128WOpnd>; ++ ++def VFRINT_D : LSX_2R<0b0111001010011101001110>, ++ LSX_2RF_DESC_BASE<"vfrint.d", frint, LSX128DOpnd>; ++ ++ ++def VFCVTL_S_H : LSX_2R<0b0111001010011101111010>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvtl.s.h", LSX128WOpnd, LSX128HOpnd>; ++ ++def VFCVTH_S_H : LSX_2R<0b0111001010011101111011>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvth.s.h", LSX128WOpnd, LSX128HOpnd>; ++ ++ ++def VFCVTL_D_S : LSX_2R<0b0111001010011101111100>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvtl.d.s", LSX128DOpnd, LSX128WOpnd>; ++ ++def VFCVTH_D_S : LSX_2R<0b0111001010011101111101>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvth.d.s", LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFFINT_S_W : LSX_2R<0b0111001010011110000000>, ++ LSX_2RF_DESC_BASE<"vffint.s.w", sint_to_fp, LSX128WOpnd>; ++ ++def VFFINT_S_WU : LSX_2R<0b0111001010011110000001>, ++ LSX_2RF_DESC_BASE<"vffint.s.wu", uint_to_fp, LSX128WOpnd>; ++ ++ ++def VFFINT_D_L : LSX_2R<0b0111001010011110000010>, ++ LSX_2RF_DESC_BASE<"vffint.d.l", sint_to_fp, LSX128DOpnd>; ++ ++def VFFINT_D_LU : LSX_2R<0b0111001010011110000011>, ++ LSX_2RF_DESC_BASE<"vffint.d.lu", uint_to_fp, LSX128DOpnd>; ++ ++ ++def VFTINT_W_S : LSX_2R<0b0111001010011110001100>, ++ LSX_2RFN_DESC_BASE<"vftint.w.s", LSX128WOpnd>; ++ ++def VFTINT_L_D : LSX_2R<0b0111001010011110001101>, ++ LSX_2RFN_DESC_BASE<"vftint.l.d", LSX128DOpnd>; ++ ++ ++def VFTINT_WU_S : LSX_2R<0b0111001010011110010110>, ++ LSX_2RFN_DESC_BASE<"vftint.wu.s", LSX128WOpnd>; ++ ++def VFTINT_LU_D : LSX_2R<0b0111001010011110010111>, ++ LSX_2RFN_DESC_BASE<"vftint.lu.d", LSX128DOpnd>; ++ ++ ++def VFTINTRZ_WU_S : LSX_2R<0b0111001010011110011100>, ++ LSX_2RF_DESC_BASE<"vftintrz.wu.s", fp_to_uint, LSX128WOpnd>; ++ ++def VFTINTRZ_LU_D : LSX_2R<0b0111001010011110011101>, ++ LSX_2RF_DESC_BASE<"vftintrz.lu.d", fp_to_uint, LSX128DOpnd>; ++ ++ ++def VREPLGR2VR_B : LSX_2R_1GP<0b0111001010011111000000>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.b", v16i8, vsplati8, LSX128BOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_H : LSX_2R_1GP<0b0111001010011111000001>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.h", v8i16, vsplati16, LSX128HOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_W : LSX_2R_1GP<0b0111001010011111000010>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.w", v4i32, vsplati32, LSX128WOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_D : LSX_2R_1GP<0b0111001010011111000011>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.d", v2i64, vsplati64, LSX128DOpnd, GPR64Opnd>; ++ ++ ++class LSX_2R_FILL_PSEUDO_BASE : ++ LSXPseudo<(outs RCVD:$vd), (ins RCVS:$fs), ++ [(set RCVD:$vd, (OpNode RCVS:$fs))]> { ++ let usesCustomInserter = 1; ++} ++ ++class FILL_FW_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; ++class FILL_FD_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; ++ ++def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC; ++def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC; ++ ++ ++def VSRLRI_B : LSX_I3_U<0b0111001010100100001>, ++ LSX_BIT_3N_DESC_BASE<"vsrlri.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSRLRI_H : LSX_I4_U<0b011100101010010001>, ++ LSX_BIT_4N_DESC_BASE<"vsrlri.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSRLRI_W : LSX_I5_U<0b01110010101001001>, ++ LSX_BIT_5N_DESC_BASE<"vsrlri.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSRLRI_D : LSX_I6_U<0b0111001010100101>, ++ LSX_BIT_6N_DESC_BASE<"vsrlri.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSRARI_B : LSX_I3_U<0b0111001010101000001>, ++ LSX_BIT_3N_DESC_BASE<"vsrari.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSRARI_H : LSX_I4_U<0b011100101010100001>, ++ LSX_BIT_4N_DESC_BASE<"vsrari.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSRARI_W : LSX_I5_U<0b01110010101010001>, ++ LSX_BIT_5N_DESC_BASE<"vsrari.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSRARI_D : LSX_I6_U<0b0111001010101001>, ++ LSX_BIT_6N_DESC_BASE<"vsrari.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VINSGR2VR_B : LSX_I4_R_U<0b011100101110101110>, ++ LSX_INSERT_U4_DESC_BASE<"vinsgr2vr.b", vinsert_v16i8, uimm4, immZExt4Ptr, LSX128BOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_H : LSX_I3_R_U<0b0111001011101011110>, ++ LSX_INSERT_U3_DESC_BASE<"vinsgr2vr.h", vinsert_v8i16, uimm3, immZExt3Ptr, LSX128HOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_W : LSX_I2_R_U<0b01110010111010111110>, ++ LSX_INSERT_U2_DESC_BASE<"vinsgr2vr.w", vinsert_v4i32, uimm2, immZExt2Ptr, LSX128WOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_D : LSX_I1_R_U<0b011100101110101111110>, ++ LSX_INSERT_U1_DESC_BASE<"vinsgr2vr.d", vinsert_v2i64, uimm1, immZExt1Ptr, LSX128DOpnd, GPR64Opnd>; ++ ++ ++def VPICKVE2GR_B : LSX_ELM_COPY_B<0b011100101110111110>, ++ LSX_PICK_U4_DESC_BASE<"vpickve2gr.b", vextract_sext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; ++ ++def VPICKVE2GR_H : LSX_ELM_COPY_H<0b0111001011101111110>, ++ LSX_PICK_U3_DESC_BASE<"vpickve2gr.h", vextract_sext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; ++ ++def VPICKVE2GR_W : LSX_ELM_COPY_W<0b01110010111011111110>, ++ LSX_PICK_U2_DESC_BASE<"vpickve2gr.w", vextract_sext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; ++ ++def VPICKVE2GR_D : LSX_ELM_COPY_D<0b011100101110111111110>, ++ LSX_PICK_U1_DESC_BASE<"vpickve2gr.d", vextract_sext_i64, v2i64, uimm1_ptr, immZExt1Ptr, GPR64Opnd, LSX128DOpnd>; ++ ++ ++def VPICKVE2GR_BU : LSX_ELM_COPY_B<0b011100101111001110>, ++ LSX_PICK_U4_DESC_BASE<"vpickve2gr.bu", vextract_zext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; ++ ++def VPICKVE2GR_HU : LSX_ELM_COPY_H<0b0111001011110011110>, ++ LSX_PICK_U3_DESC_BASE<"vpickve2gr.hu", vextract_zext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; ++ ++def VPICKVE2GR_WU : LSX_ELM_COPY_W<0b01110010111100111110>, ++ LSX_PICK_U2_DESC_BASE<"vpickve2gr.wu", vextract_zext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; ++ ++def VPICKVE2GR_DU : LSX_ELM_COPY_D<0b011100101111001111110>, ++ LSX_PICK_U1_DESC_BASE<"vpickve2gr.du", int_loongarch_lsx_vpickve2gr_du, v2i64, uimm1, immZExt1, GPR64Opnd, LSX128DOpnd>; ++ ++ ++def : LSXPat<(vextract_zext_i64 (v2i64 LSX128D:$vj), immZExt1Ptr:$idx), ++ (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; ++def : LSXPat<(vextract_zext_i64 (v2f64 LSX128D:$vj), immZExt1Ptr:$idx), ++ (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; ++ ++ ++def VREPLVEI_B : LSX_I4_U<0b011100101111011110>, ++ LSX_ELM_U4_VREPLVE_DESC_BASE<"vreplvei.b", vsplati8_uimm4, LSX128BOpnd>; ++ ++def VREPLVEI_H : LSX_I3_U<0b0111001011110111110>, ++ LSX_ELM_U3_VREPLVE_DESC_BASE<"vreplvei.h", vsplati16_uimm3, LSX128HOpnd>; ++ ++def VREPLVEI_W : LSX_I2_U<0b01110010111101111110>, ++ LSX_ELM_U2_VREPLVE_DESC_BASE<"vreplvei.w", vsplati32_uimm2, LSX128WOpnd>; ++ ++def VREPLVEI_D : LSX_I1_U<0b011100101111011111110>, ++ LSX_ELM_U1_VREPLVE_DESC_BASE<"vreplvei.d", vsplati64_uimm1, LSX128DOpnd>; ++ ++ ++def VSAT_B : LSX_I3_U<0b0111001100100100001>, ++ LSX_BIT_3N_DESC_BASE<"vsat.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSAT_H : LSX_I4_U<0b011100110010010001>, ++ LSX_BIT_4N_DESC_BASE<"vsat.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSAT_W : LSX_I5_U<0b01110011001001001>, ++ LSX_BIT_5N_DESC_BASE<"vsat.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSAT_D : LSX_I6_U<0b0111001100100101>, ++ LSX_BIT_6N_DESC_BASE<"vsat.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSAT_BU : LSX_I3_U<0b0111001100101000001>, ++ LSX_BIT_3N_DESC_BASE<"vsat.bu", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSAT_HU : LSX_I4_U<0b011100110010100001>, ++ LSX_BIT_4N_DESC_BASE<"vsat.hu", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSAT_WU : LSX_I5_U<0b01110011001010001>, ++ LSX_BIT_5N_DESC_BASE<"vsat.wu", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSAT_DU : LSX_I6_U<0b0111001100101001>, ++ LSX_BIT_6N_DESC_BASE<"vsat.du", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSLLI_B : LSX_I3_U<0b0111001100101100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vslli.b", shl, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSLLI_H : LSX_I4_U<0b011100110010110001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vslli.h", shl, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSLLI_W : LSX_I5_U<0b01110011001011001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vslli.w", shl, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLLI_D : LSX_I6_U<0b0111001100101101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vslli.d", shl, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VSRLI_B : LSX_I3_U<0b0111001100110000001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrli.b", srl, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSRLI_H : LSX_I4_U<0b011100110011000001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrli.h", srl, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSRLI_W : LSX_I5_U<0b01110011001100001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrli.w", srl, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSRLI_D : LSX_I6_U<0b0111001100110001>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrli.d", srl, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VSRAI_B : LSX_I3_U<0b0111001100110100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"vsrai.b", int_loongarch_lsx_vsrai_b, LSX128BOpnd>; ++ ++def VSRAI_H : LSX_I4_U<0b011100110011010001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"vsrai.h", int_loongarch_lsx_vsrai_h, LSX128HOpnd>; ++ ++def VSRAI_W : LSX_I5_U<0b01110011001101001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"vsrai.w", int_loongarch_lsx_vsrai_w, LSX128WOpnd>; ++ ++def VSRAI_D : LSX_I6_U<0b0111001100110101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"vsrai.d", int_loongarch_lsx_vsrai_d, LSX128DOpnd>; ++ ++ ++def VSHUF4I_B : LSX_I8_U<0b01110011100100>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.b", LSX128BOpnd>; ++ ++def VSHUF4I_H : LSX_I8_U<0b01110011100101>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.h", LSX128HOpnd>; ++ ++def VSHUF4I_W : LSX_I8_U<0b01110011100110>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.w", LSX128WOpnd>; ++ ++def VSHUF4I_D : LSX_I8_U<0b01110011100111>, ++ LSX_I8_SHUF_DESC_BASE_D<"vshuf4i.d", int_loongarch_lsx_vshuf4i_d, LSX128DOpnd>; ++ ++ ++def VROTR_B : LSX_3R<0b01110000111011100>, ++ LSX_3R_DESC_BASE<"vrotr.b", LoongArchVROR, LSX128BOpnd>; ++ ++def VROTR_H : LSX_3R<0b01110000111011101>, ++ LSX_3R_DESC_BASE<"vrotr.h", LoongArchVROR, LSX128HOpnd>; ++ ++def VROTR_W : LSX_3R<0b01110000111011110>, ++ LSX_3R_DESC_BASE<"vrotr.w", LoongArchVROR, LSX128WOpnd>; ++ ++def VROTR_D : LSX_3R<0b01110000111011111>, ++ LSX_3R_DESC_BASE<"vrotr.d", LoongArchVROR, LSX128DOpnd>; ++ ++ ++def VMSKLTZ_B : LSX_2R<0b0111001010011100010000>, ++ LSX_2RN_DESC_BASE<"vmskltz.b", LSX128BOpnd>; ++ ++def VMSKLTZ_H : LSX_2R<0b0111001010011100010001>, ++ LSX_2RN_DESC_BASE<"vmskltz.h", LSX128HOpnd>; ++ ++def VMSKLTZ_W : LSX_2R<0b0111001010011100010010>, ++ LSX_2RN_DESC_BASE<"vmskltz.w", LSX128WOpnd>; ++ ++def VMSKLTZ_D : LSX_2R<0b0111001010011100010011>, ++ LSX_2RN_DESC_BASE<"vmskltz.d", LSX128DOpnd>; ++ ++ ++def VROTRI_B : LSX_I3_U<0b0111001010100000001>, ++ LSX2_RORI_U3_DESC_BASE<"vrotri.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VROTRI_H : LSX_I4_U<0b011100101010000001>, ++ LSX2_RORI_U4_DESC_BASE<"vrotri.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VROTRI_W : LSX_I5_U<0b01110010101000001>, ++ LSX2_RORI_U5_DESC_BASE<"vrotri.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VROTRI_D : LSX_I6_U<0b0111001010100001>, ++ LSX2_RORI_U6_DESC_BASE<"vrotri.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSRLNI_B_H : LSX_I4_U<0b011100110100000001>, ++ LSX_BIND_U4N_DESC_BASE<"vsrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRLNI_H_W : LSX_I5_U<0b01110011010000001>, ++ LSX_BIND_U5N_DESC_BASE<"vsrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRLNI_W_D : LSX_I6_U<0b0111001101000001>, ++ LSX_BIND_U6N_DESC_BASE<"vsrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRLNI_D_Q : LSX_I7_U<0b011100110100001>, ++ LSX_BIND_U7N_DESC_BASE<"vsrlni.d.q", LSX128DOpnd>; ++ ++ ++def VSRLRNI_B_H : LSX_I4_U<0b011100110100010001>, ++ LSX_BIND_U4_DESC_BASE<"vsrlrni.b.h", int_loongarch_lsx_vsrlrni_b_h, uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRLRNI_H_W : LSX_I5_U<0b01110011010001001>, ++ LSX_BIND_U5_DESC_BASE<"vsrlrni.h.w", int_loongarch_lsx_vsrlrni_h_w, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRLRNI_W_D : LSX_I6_U<0b0111001101000101>, ++ LSX_BIND_U6_DESC_BASE<"vsrlrni.w.d", int_loongarch_lsx_vsrlrni_w_d, uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRLRNI_D_Q : LSX_I7_U<0b011100110100011>, ++ LSX_BIND_U7_DESC_BASE<"vsrlrni.d.q", int_loongarch_lsx_vsrlrni_d_q, LSX128DOpnd>; ++ ++ ++def VSSRLNI_B_H : LSX_I4_U<0b011100110100100001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRLNI_H_W : LSX_I5_U<0b01110011010010001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLNI_W_D : LSX_I6_U<0b0111001101001001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLNI_D_Q : LSX_I7_U<0b011100110100101>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRLNI_BU_H : LSX_I4_U<0b011100110100110001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlni.bu.h", uimm4, immZExt4, LSX128BOpnd> ; ++ ++def VSSRLNI_HU_W : LSX_I5_U<0b01110011010011001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLNI_WU_D : LSX_I6_U<0b0111001101001101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLNI_DU_Q : LSX_I7_U<0b011100110100111>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlni.du.q", LSX128DOpnd>; ++ ++ ++def VSSRLRNI_BU_H : LSX_I4_U<0b011100110101010001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlrni.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRLRNI_HU_W : LSX_I5_U<0b01110011010101001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlrni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLRNI_WU_D : LSX_I6_U<0b0111001101010101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlrni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLRNI_DU_Q : LSX_I7_U<0b011100110101011>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlrni.du.q", LSX128DOpnd>; ++ ++ ++def VSRARNI_B_H : LSX_I4_U<0b011100110101110001>, ++ LSX_BIND_U4N_DESC_BASE<"vsrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRARNI_H_W : LSX_I5_U<0b01110011010111001>, ++ LSX_BIND_U5N_DESC_BASE<"vsrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRARNI_W_D : LSX_I6_U<0b0111001101011101>, ++ LSX_BIND_U6N_DESC_BASE<"vsrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRARNI_D_Q : LSX_I7_U<0b011100110101111>, ++ LSX_BIND_U7N_DESC_BASE<"vsrarni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRANI_B_H : LSX_I4_U<0b011100110110000001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrani.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRANI_H_W : LSX_I5_U<0b01110011011000001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrani.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRANI_W_D : LSX_I6_U<0b0111001101100001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrani.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRANI_D_Q : LSX_I7_U<0b011100110110001>, ++ LSX_BIND_U7N_DESC_BASE<"vssrani.d.q", LSX128DOpnd>; ++ ++ ++def VSSRANI_BU_H : LSX_I4_U<0b011100110110010001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrani.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRANI_HU_W : LSX_I5_U<0b01110011011001001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrani.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRANI_WU_D : LSX_I6_U<0b0111001101100101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrani.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRANI_DU_Q : LSX_I7_U<0b011100110110011>, ++ LSX_BIND_U7N_DESC_BASE<"vssrani.du.q", LSX128DOpnd>; ++ ++ ++def VSSRARNI_B_H : LSX_I4_U<0b011100110110100001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRARNI_H_W : LSX_I5_U<0b01110011011010001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRARNI_W_D : LSX_I6_U<0b0111001101101001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRARNI_D_Q : LSX_I7_U<0b011100110110101>, ++ LSX_BIND_U7N_DESC_BASE<"vssrarni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRARNI_BU_H : LSX_I4_U<0b011100110110110001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrarni.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRARNI_HU_W : LSX_I5_U<0b01110011011011001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrarni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRARNI_WU_D : LSX_I6_U<0b0111001101101101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrarni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRARNI_DU_Q : LSX_I7_U<0b011100110110111>, ++ LSX_BIND_U7N_DESC_BASE<"vssrarni.du.q", LSX128DOpnd>; ++ ++ ++ ++def VLD : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v16i8, LSX128BOpnd, mem>; ++ ++def VST : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v16i8, LSX128BOpnd, mem_simm12>; ++ ++ ++def VSETEQZ_V : LSX_SET<0b0111001010011100100110>, ++ LSX_SET_DESC_BASE<"vseteqz.v", LSX128BOpnd>; ++ ++def VSETNEZ_V : LSX_SET<0b0111001010011100100111>, ++ LSX_SET_DESC_BASE<"vsetnez.v", LSX128BOpnd>; ++ ++ ++def VSETANYEQZ_B : LSX_SET<0b0111001010011100101000>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.b", LSX128BOpnd>; ++ ++def VSETANYEQZ_H : LSX_SET<0b0111001010011100101001>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.h", LSX128HOpnd>; ++ ++def VSETANYEQZ_W : LSX_SET<0b0111001010011100101010>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.w", LSX128WOpnd>; ++ ++def VSETANYEQZ_D : LSX_SET<0b0111001010011100101011>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.d", LSX128DOpnd>; ++ ++ ++def VSETALLNEZ_B : LSX_SET<0b0111001010011100101100>, ++ LSX_SET_DESC_BASE<"vsetallnez.b", LSX128BOpnd>; ++ ++def VSETALLNEZ_H : LSX_SET<0b0111001010011100101101>, ++ LSX_SET_DESC_BASE<"vsetallnez.h", LSX128HOpnd>; ++ ++def VSETALLNEZ_W : LSX_SET<0b0111001010011100101110>, ++ LSX_SET_DESC_BASE<"vsetallnez.w", LSX128WOpnd>; ++ ++def VSETALLNEZ_D : LSX_SET<0b0111001010011100101111>, ++ LSX_SET_DESC_BASE<"vsetallnez.d", LSX128DOpnd>; ++ ++class LSX_CBRANCH_PSEUDO_DESC_BASE : ++ LoongArchPseudo<(outs GPR32Opnd:$rd), ++ (ins RCVS:$vj), ++ [(set GPR32Opnd:$rd, (OpNode (TyNode RCVS:$vj)))]> { ++ bit usesCustomInserter = 1; ++} ++ ++def SNZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++ ++def SZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++ ++ ++def VFMADD_S : LSX_VR4MUL<0b000010010001>, ++ LSX_4RF<"vfmadd.s", int_loongarch_lsx_vfmadd_s, LSX128WOpnd>; ++ ++def VFMADD_D : LSX_VR4MUL<0b000010010010>, ++ LSX_4RF<"vfmadd.d", int_loongarch_lsx_vfmadd_d, LSX128DOpnd>; ++ ++def VFMSUB_S : LSX_VR4MUL<0b000010010101>, ++ LSX_4RF<"vfmsub.s", int_loongarch_lsx_vfmsub_s, LSX128WOpnd>; ++ ++def VFMSUB_D : LSX_VR4MUL<0b000010010110>, ++ LSX_4RF<"vfmsub.d", int_loongarch_lsx_vfmsub_d, LSX128DOpnd>; ++ ++def VFNMADD_S : LSX_VR4MUL<0b000010011001>, ++ LSX_4RF<"vfnmadd.s", int_loongarch_lsx_vfnmadd_s, LSX128WOpnd>; ++ ++def VFNMADD_D : LSX_VR4MUL<0b000010011010>, ++ LSX_4RF<"vfnmadd.d", int_loongarch_lsx_vfnmadd_d, LSX128DOpnd>; ++ ++def VFNMSUB_S : LSX_VR4MUL<0b000010011101>, ++ LSX_4RF<"vfnmsub.s", int_loongarch_lsx_vfnmsub_s, LSX128WOpnd>; ++ ++def VFNMSUB_D : LSX_VR4MUL<0b000010011110>, ++ LSX_4RF<"vfnmsub.d", int_loongarch_lsx_vfnmsub_d, LSX128DOpnd>; ++ ++ ++// vfmadd: vj * vk + va ++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), ++ (VFMADD_D $vj, $vk, $va)>; ++ ++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), ++ (VFMADD_S $vj, $vk, $va)>; ++ ++ ++// vfmsub: vj * vk - va ++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), ++ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), ++ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++ ++// vfnmadd: -(vj * vk + va) ++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++// vfnmsub: -(vj * vk - va) ++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++ ++def VFCMP_CAF_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.caf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_caf_s>{ ++ bits<5> cond=0x0; ++ } ++ ++def VFCMP_CAF_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.caf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_caf_d>{ ++ bits<5> cond=0x0; ++ } ++ ++ ++def VFCMP_COR_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetord_v4f32>{ ++ bits<5> cond=0x14; ++ } ++ ++def VFCMP_COR_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetord_v2f64>{ ++ bits<5> cond=0x14; ++ } ++ ++ ++def VFCMP_CUN_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetun_v4f32>{ ++ bits<5> cond=0x8; ++ } ++ ++def VFCMP_CUN_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetun_v2f64>{ ++ bits<5> cond=0x8; ++ } ++ ++ ++def VFCMP_CUNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetune_v4f32>{ ++ bits<5> cond=0x18; ++ } ++ ++def VFCMP_CUNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetune_v2f64>{ ++ bits<5> cond=0x18; ++ } ++ ++ ++def VFCMP_CUEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetueq_v4f32>{ ++ bits<5> cond=0xc; ++ } ++ ++def VFCMP_CUEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetueq_v2f64>{ ++ bits<5> cond=0xc; ++ } ++ ++def VFCMP_CEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.ceq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetoeq_v4f32>{ ++ bits<5> cond=0x4; ++ } ++ ++def VFCMP_CEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.ceq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetoeq_v2f64>{ ++ bits<5> cond=0x4; ++ } ++ ++ ++def VFCMP_CNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetone_v4f32>{ ++ bits<5> cond=0x10; ++ } ++ ++def VFCMP_CNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetone_v2f64>{ ++ bits<5> cond=0x10; ++ } ++ ++ ++def VFCMP_CLT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.clt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetolt_v4f32>{ ++ bits<5> cond=0x2; ++ } ++ ++def VFCMP_CLT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.clt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetolt_v2f64>{ ++ bits<5> cond=0x2; ++ } ++ ++ ++def VFCMP_CULT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetult_v4f32>{ ++ bits<5> cond=0xa; ++ } ++ ++def VFCMP_CULT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetult_v2f64>{ ++ bits<5> cond=0xa; ++ } ++ ++ ++def VFCMP_CLE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetole_v4f32>{ ++ bits<5> cond=0x6; ++ } ++ ++def VFCMP_CLE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetole_v2f64>{ ++ bits<5> cond=0x6; ++ } ++ ++ ++def VFCMP_CULE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetule_v4f32>{ ++ bits<5> cond=0xe; ++ } ++ ++def VFCMP_CULE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetule_v2f64>{ ++ bits<5> cond=0xe; ++ } ++ ++ ++def VFCMP_SAF_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.saf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_saf_s>{ ++ bits<5> cond=0x1; ++ } ++ ++def VFCMP_SAF_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.saf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_saf_d>{ ++ bits<5> cond=0x1; ++ } ++ ++def VFCMP_SOR_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sor_s>{ ++ bits<5> cond=0x15; ++ } ++ ++def VFCMP_SOR_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sor_d>{ ++ bits<5> cond=0x15; ++ } ++ ++def VFCMP_SUN_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sun_s>{ ++ bits<5> cond=0x9; ++ } ++ ++def VFCMP_SUN_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sun_d>{ ++ bits<5> cond=0x9; ++ } ++ ++def VFCMP_SUNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sune_s>{ ++ bits<5> cond=0x19; ++ } ++ ++def VFCMP_SUNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sune_d>{ ++ bits<5> cond=0x19; ++ } ++ ++def VFCMP_SUEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sueq_s>{ ++ bits<5> cond=0xd; ++ } ++ ++def VFCMP_SUEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sueq_d>{ ++ bits<5> cond=0xd; ++ } ++ ++def VFCMP_SEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.seq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_seq_s>{ ++ bits<5> cond=0x5; ++ } ++ ++def VFCMP_SEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.seq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_seq_d>{ ++ bits<5> cond=0x5; ++ } ++ ++def VFCMP_SNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sne_s>{ ++ bits<5> cond=0x11; ++ } ++ ++def VFCMP_SNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sne_d>{ ++ bits<5> cond=0x11; ++ } ++ ++def VFCMP_SLT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.slt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_slt_s>{ ++ bits<5> cond=0x3; ++ } ++ ++def VFCMP_SLT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.slt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_slt_d>{ ++ bits<5> cond=0x3; ++ } ++ ++def VFCMP_SULT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sult_s>{ ++ bits<5> cond=0xb; ++ } ++ ++def VFCMP_SULT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sult_d>{ ++ bits<5> cond=0xb; ++ } ++ ++def VFCMP_SLE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sle_s>{ ++ bits<5> cond=0x7; ++ } ++ ++def VFCMP_SLE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sle_d>{ ++ bits<5> cond=0x7; ++ } ++ ++def VFCMP_SULE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sule_s>{ ++ bits<5> cond=0xf; ++ } ++ ++def VFCMP_SULE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sule_d>{ ++ bits<5> cond=0xf; ++ } ++ ++def VBITSEL_V : LSX_VR4MUL<0b000011010001>, ++ LSX_VMul_Reg4<"vbitsel.v", LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, int_loongarch_lsx_vbitsel_v>; ++ ++def VSHUF_B : LSX_VR4MUL<0b000011010101>, ++ LSX_3R_4R_VSHF_DESC_BASE<"vshuf.b", LSX128BOpnd>; ++ ++ ++class LSX_BSEL_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vd_in, RO:$vs, RO:$vt), ++ [(set RO:$vd, (Ty (vselect RO:$vd_in, RO:$vt, RO:$vs)))]>, ++ PseudoInstExpansion<(VBITSEL_V LSX128BOpnd:$vd, LSX128BOpnd:$vs, ++ LSX128BOpnd:$vt, LSX128BOpnd:$vd_in)> { ++ let Constraints = "$vd_in = $vd"; ++} ++ ++def BSEL_B_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_H_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_W_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_D_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_FW_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_FD_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++ ++ ++class LSX_LD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(set ROVD:$vd, (OpNode (TyNode (load Addr:$addr))))]; ++ string DecoderMethod = "DecodeLSX128memlsl"; ++} ++ ++def VLDREPL_B : LSX_SI12_S<0b0011000010>, ++ LSX_LD_DESC_BASE<"vldrepl.b", vldrepl_v16i8, v16i8, LSX128BOpnd>; ++ ++def VLDREPL_H : LSX_SI11_S<0b00110000010>, ++ LSX_LD_DESC_BASE<"vldrepl.h", vldrepl_v8i16, v8i16, LSX128HOpnd, mem_simm11_lsl1, addrimm11lsl1>; ++ ++def VLDREPL_W : LSX_SI10_S<0b001100000010>, ++ LSX_LD_DESC_BASE<"vldrepl.w", vldrepl_v4i32, v4i32, LSX128WOpnd, mem_simm10_lsl2, addrimm10lsl2>; ++ ++def VLDREPL_D : LSX_SI9_S<0b0011000000010>, ++ LSX_LD_DESC_BASE<"vldrepl.d", vldrepl_v2i64, v2i64, LSX128DOpnd, mem_simm9_lsl3, addrimm9lsl3>; ++ ++ ++def VSTELM_B : LSX_SI8_idx4<0b0011000110>, ++ LSX_I8_U4_DESC_BASE<"vstelm.b", int_loongarch_lsx_vstelm_b, simm8_32, immSExt8, LSX128BOpnd>; ++ ++def VSTELM_H : LSX_SI8_idx3<0b00110001010>, ++ LSX_I8_U3_DESC_BASE<"vstelm.h", int_loongarch_lsx_vstelm_h, immSExt8_1_O, immSExt8, LSX128HOpnd>; ++ ++def VSTELM_W : LSX_SI8_idx2<0b001100010010>, ++ LSX_I8_U2_DESC_BASE<"vstelm.w", int_loongarch_lsx_vstelm_w, immSExt8_2_O, immSExt8, LSX128WOpnd>; ++ ++def VSTELM_D : LSX_SI8_idx1<0b0011000100010>, ++ LSX_I8_U1_DESC_BASE<"vstelm.d", int_loongarch_lsx_vstelm_d, immSExt8_3_O, immSExt8, LSX128DOpnd>; ++ ++ ++let mayLoad = 1, canFoldAsLoad = 1 in { ++ def VLDX : LSX_3R_2GP<0b00111000010000000>, ++ LSX_LDX_LA<"vldx", int_loongarch_lsx_vldx, GPR64Opnd, LSX128BOpnd>; ++} ++ ++let mayStore = 1 in{ ++ def VSTX : LSX_3R_2GP<0b00111000010001000>, ++ LSX_SDX_LA<"vstx", int_loongarch_lsx_vstx, GPR64Opnd, LSX128BOpnd>; ++} ++ ++ ++def VADDWEV_H_B : LSX_3R<0b01110000000111100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.b", int_loongarch_lsx_vaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_H : LSX_3R<0b01110000000111101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.h", int_loongarch_lsx_vaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_W : LSX_3R<0b01110000000111110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.w", int_loongarch_lsx_vaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_D : LSX_3R<0b01110000000111111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.d", int_loongarch_lsx_vaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWEV_H_B : LSX_3R<0b01110000001000000>, ++ LSX_3R_DESC_BASE<"vsubwev.h.b", int_loongarch_lsx_vsubwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWEV_W_H : LSX_3R<0b01110000001000001>, ++ LSX_3R_DESC_BASE<"vsubwev.w.h", int_loongarch_lsx_vsubwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWEV_D_W : LSX_3R<0b01110000001000010>, ++ LSX_3R_DESC_BASE<"vsubwev.d.w", int_loongarch_lsx_vsubwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWEV_Q_D : LSX_3R<0b01110000001000011>, ++ LSX_3R_DESC_BASE<"vsubwev.q.d", int_loongarch_lsx_vsubwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_B : LSX_3R<0b01110000001000100>, ++ LSX_3R_DESC_BASE<"vaddwod.h.b", int_loongarch_lsx_vaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_H : LSX_3R<0b01110000001000101>, ++ LSX_3R_DESC_BASE<"vaddwod.w.h", int_loongarch_lsx_vaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_W : LSX_3R<0b01110000001000110>, ++ LSX_3R_DESC_BASE<"vaddwod.d.w", int_loongarch_lsx_vaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_D : LSX_3R<0b01110000001000111>, ++ LSX_3R_DESC_BASE<"vaddwod.q.d", int_loongarch_lsx_vaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWOD_H_B : LSX_3R<0b01110000001001000>, ++ LSX_3R_DESC_BASE<"vsubwod.h.b", int_loongarch_lsx_vsubwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWOD_W_H : LSX_3R<0b01110000001001001>, ++ LSX_3R_DESC_BASE<"vsubwod.w.h", int_loongarch_lsx_vsubwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWOD_D_W : LSX_3R<0b01110000001001010>, ++ LSX_3R_DESC_BASE<"vsubwod.d.w", int_loongarch_lsx_vsubwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWOD_Q_D : LSX_3R<0b01110000001001011>, ++ LSX_3R_DESC_BASE<"vsubwod.q.d", int_loongarch_lsx_vsubwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWEV_H_BU : LSX_3R<0b01110000001011100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.bu", int_loongarch_lsx_vaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_HU : LSX_3R<0b01110000001011101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.hu", int_loongarch_lsx_vaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_WU : LSX_3R<0b01110000001011110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.wu", int_loongarch_lsx_vaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_DU : LSX_3R<0b01110000001011111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.du", int_loongarch_lsx_vaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWEV_H_BU : LSX_3R<0b01110000001100000>, ++ LSX_3R_DESC_BASE<"vsubwev.h.bu", int_loongarch_lsx_vsubwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWEV_W_HU : LSX_3R<0b01110000001100001>, ++ LSX_3R_DESC_BASE<"vsubwev.w.hu", int_loongarch_lsx_vsubwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWEV_D_WU : LSX_3R<0b01110000001100010>, ++ LSX_3R_DESC_BASE<"vsubwev.d.wu", int_loongarch_lsx_vsubwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWEV_Q_DU : LSX_3R<0b01110000001100011>, ++ LSX_3R_DESC_BASE<"vsubwev.q.du", int_loongarch_lsx_vsubwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_BU : LSX_3R<0b01110000001100100>, ++ LSX_3R_DESC_BASE<"vaddwod.h.bu", int_loongarch_lsx_vaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_HU : LSX_3R<0b01110000001100101>, ++ LSX_3R_DESC_BASE<"vaddwod.w.hu", int_loongarch_lsx_vaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_WU : LSX_3R<0b01110000001100110>, ++ LSX_3R_DESC_BASE<"vaddwod.d.wu", int_loongarch_lsx_vaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_DU : LSX_3R<0b01110000001100111>, ++ LSX_3R_DESC_BASE<"vaddwod.q.du", int_loongarch_lsx_vaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWOD_H_BU : LSX_3R<0b01110000001101000>, ++ LSX_3R_DESC_BASE<"vsubwod.h.bu", int_loongarch_lsx_vsubwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWOD_W_HU : LSX_3R<0b01110000001101001>, ++ LSX_3R_DESC_BASE<"vsubwod.w.hu", int_loongarch_lsx_vsubwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWOD_D_WU : LSX_3R<0b01110000001101010>, ++ LSX_3R_DESC_BASE<"vsubwod.d.wu", int_loongarch_lsx_vsubwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWOD_Q_DU : LSX_3R<0b01110000001101011>, ++ LSX_3R_DESC_BASE<"vsubwod.q.du", int_loongarch_lsx_vsubwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWEV_H_BU_B : LSX_3R<0b01110000001111100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.bu.b", int_loongarch_lsx_vaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_HU_H : LSX_3R<0b01110000001111101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.hu.h", int_loongarch_lsx_vaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_WU_W : LSX_3R<0b01110000001111110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.wu.w", int_loongarch_lsx_vaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_DU_D : LSX_3R<0b01110000001111111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.du.d", int_loongarch_lsx_vaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_BU_B : LSX_3R<0b01110000010000000>, ++ LSX_3R_DESC_BASE<"vaddwod.h.bu.b", int_loongarch_lsx_vaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_HU_H : LSX_3R<0b01110000010000001>, ++ LSX_3R_DESC_BASE<"vaddwod.w.hu.h", int_loongarch_lsx_vaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_WU_W : LSX_3R<0b01110000010000010>, ++ LSX_3R_DESC_BASE<"vaddwod.d.wu.w", int_loongarch_lsx_vaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_DU_D : LSX_3R<0b01110000010000011>, ++ LSX_3R_DESC_BASE<"vaddwod.q.du.d", int_loongarch_lsx_vaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VHADDW_Q_D : LSX_3R<0b01110000010101011>, ++ LSX_3R_DESC_BASE<"vhaddw.q.d", int_loongarch_lsx_vhaddw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VHSUBW_Q_D : LSX_3R<0b01110000010101111>, ++ LSX_3R_DESC_BASE<"vhsubw.q.d", int_loongarch_lsx_vhsubw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VHADDW_QU_DU : LSX_3R<0b01110000010110011>, ++ LSX_3R_DESC_BASE<"vhaddw.qu.du", int_loongarch_lsx_vhaddw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VHSUBW_QU_DU : LSX_3R<0b01110000010110111>, ++ LSX_3R_DESC_BASE<"vhsubw.qu.du", int_loongarch_lsx_vhsubw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMUH_B : LSX_3R<0b01110000100001100>, ++ LSX_3R_DESC_BASE<"vmuh.b", int_loongarch_lsx_vmuh_b, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMUH_H : LSX_3R<0b01110000100001101>, ++ LSX_3R_DESC_BASE<"vmuh.h", int_loongarch_lsx_vmuh_h, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMUH_W : LSX_3R<0b01110000100001110>, ++ LSX_3R_DESC_BASE<"vmuh.w", int_loongarch_lsx_vmuh_w, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMUH_D : LSX_3R<0b01110000100001111>, ++ LSX_3R_DESC_BASE<"vmuh.d", int_loongarch_lsx_vmuh_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMUH_BU : LSX_3R<0b01110000100010000>, ++ LSX_3R_DESC_BASE<"vmuh.bu", int_loongarch_lsx_vmuh_bu, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMUH_HU : LSX_3R<0b01110000100010001>, ++ LSX_3R_DESC_BASE<"vmuh.hu", int_loongarch_lsx_vmuh_hu, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMUH_WU : LSX_3R<0b01110000100010010>, ++ LSX_3R_DESC_BASE<"vmuh.wu", int_loongarch_lsx_vmuh_wu, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMUH_DU : LSX_3R<0b01110000100010011>, ++ LSX_3R_DESC_BASE<"vmuh.du", int_loongarch_lsx_vmuh_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_B : LSX_3R<0b01110000100100000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.b", int_loongarch_lsx_vmulwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_H : LSX_3R<0b01110000100100001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.h", int_loongarch_lsx_vmulwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_W : LSX_3R<0b01110000100100010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.w", int_loongarch_lsx_vmulwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_D : LSX_3R<0b01110000100100011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.d", int_loongarch_lsx_vmulwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_B : LSX_3R<0b01110000100100100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.b", int_loongarch_lsx_vmulwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_H : LSX_3R<0b01110000100100101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.h", int_loongarch_lsx_vmulwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_W : LSX_3R<0b01110000100100110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.w", int_loongarch_lsx_vmulwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_D : LSX_3R<0b01110000100100111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.d", int_loongarch_lsx_vmulwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_BU : LSX_3R<0b01110000100110000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.bu", int_loongarch_lsx_vmulwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_HU : LSX_3R<0b01110000100110001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.hu", int_loongarch_lsx_vmulwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_WU : LSX_3R<0b01110000100110010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.wu", int_loongarch_lsx_vmulwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_DU : LSX_3R<0b01110000100110011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.du", int_loongarch_lsx_vmulwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_BU : LSX_3R<0b01110000100110100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.bu", int_loongarch_lsx_vmulwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_HU : LSX_3R<0b01110000100110101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.hu", int_loongarch_lsx_vmulwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_WU : LSX_3R<0b01110000100110110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.wu", int_loongarch_lsx_vmulwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_DU : LSX_3R<0b01110000100110111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.du", int_loongarch_lsx_vmulwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_BU_B : LSX_3R<0b01110000101000000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.bu.b", int_loongarch_lsx_vmulwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_HU_H : LSX_3R<0b01110000101000001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.hu.h", int_loongarch_lsx_vmulwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_WU_W : LSX_3R<0b01110000101000010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.wu.w", int_loongarch_lsx_vmulwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_DU_D : LSX_3R<0b01110000101000011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.du.d", int_loongarch_lsx_vmulwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_BU_B : LSX_3R<0b01110000101000100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.bu.b", int_loongarch_lsx_vmulwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_HU_H : LSX_3R<0b01110000101000101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.hu.h", int_loongarch_lsx_vmulwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_WU_W : LSX_3R<0b01110000101000110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.wu.w", int_loongarch_lsx_vmulwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_DU_D : LSX_3R<0b01110000101000111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.du.d", int_loongarch_lsx_vmulwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_B : LSX_3R<0b01110000101011000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.b", int_loongarch_lsx_vmaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_H : LSX_3R<0b01110000101011001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.h", int_loongarch_lsx_vmaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_W : LSX_3R<0b01110000101011010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.w", int_loongarch_lsx_vmaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWEV_Q_D : LSX_3R<0b01110000101011011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.d", int_loongarch_lsx_vmaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_B : LSX_3R<0b01110000101011100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.b", int_loongarch_lsx_vmaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_H : LSX_3R<0b01110000101011101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.h", int_loongarch_lsx_vmaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_W : LSX_3R<0b01110000101011110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.w", int_loongarch_lsx_vmaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWOD_Q_D : LSX_3R<0b01110000101011111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.d", int_loongarch_lsx_vmaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_BU : LSX_3R<0b01110000101101000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu", int_loongarch_lsx_vmaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_HU : LSX_3R<0b01110000101101001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu", int_loongarch_lsx_vmaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_WU : LSX_3R<0b01110000101101010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu", int_loongarch_lsx_vmaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWEV_Q_DU : LSX_3R<0b01110000101101011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.du", int_loongarch_lsx_vmaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_BU : LSX_3R<0b01110000101101100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu", int_loongarch_lsx_vmaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_HU : LSX_3R<0b01110000101101101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu", int_loongarch_lsx_vmaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_WU : LSX_3R<0b01110000101101110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu", int_loongarch_lsx_vmaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWOD_Q_DU : LSX_3R<0b01110000101101111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.du", int_loongarch_lsx_vmaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_BU_B : LSX_3R<0b01110000101111000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu.b", int_loongarch_lsx_vmaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_HU_H : LSX_3R<0b01110000101111001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu.h", int_loongarch_lsx_vmaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_WU_W : LSX_3R<0b01110000101111010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu.w", int_loongarch_lsx_vmaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMADDWEV_Q_DU_D : LSX_3R<0b01110000101111011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.du.d", int_loongarch_lsx_vmaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_BU_B : LSX_3R<0b01110000101111100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu.b", int_loongarch_lsx_vmaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_HU_H : LSX_3R<0b01110000101111101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu.h", int_loongarch_lsx_vmaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_WU_W : LSX_3R<0b01110000101111110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu.w", int_loongarch_lsx_vmaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMADDWOD_Q_DU_D : LSX_3R<0b01110000101111111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.du.d", int_loongarch_lsx_vmaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRLN_B_H : LSX_3R<0b01110000111101001>, ++ LSX_3R_DESC_BASE<"vsrln.b.h", int_loongarch_lsx_vsrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRLN_H_W : LSX_3R<0b01110000111101010>, ++ LSX_3R_DESC_BASE<"vsrln.h.w", int_loongarch_lsx_vsrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRLN_W_D : LSX_3R<0b01110000111101011>, ++ LSX_3R_DESC_BASE<"vsrln.w.d", int_loongarch_lsx_vsrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRAN_B_H : LSX_3R<0b01110000111101101>, ++ LSX_3R_DESC_BASE<"vsran.b.h", int_loongarch_lsx_vsran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRAN_H_W : LSX_3R<0b01110000111101110>, ++ LSX_3R_DESC_BASE<"vsran.h.w", int_loongarch_lsx_vsran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRAN_W_D : LSX_3R<0b01110000111101111>, ++ LSX_3R_DESC_BASE<"vsran.w.d", int_loongarch_lsx_vsran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRLRN_B_H : LSX_3R<0b01110000111110001>, ++ LSX_3R_DESC_BASE<"vsrlrn.b.h", int_loongarch_lsx_vsrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRLRN_H_W : LSX_3R<0b01110000111110010>, ++ LSX_3R_DESC_BASE<"vsrlrn.h.w", int_loongarch_lsx_vsrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRLRN_W_D : LSX_3R<0b01110000111110011>, ++ LSX_3R_DESC_BASE<"vsrlrn.w.d", int_loongarch_lsx_vsrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRARN_B_H : LSX_3R<0b01110000111110101>, ++ LSX_3R_DESC_BASE<"vsrarn.b.h", int_loongarch_lsx_vsrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRARN_H_W : LSX_3R<0b01110000111110110>, ++ LSX_3R_DESC_BASE<"vsrarn.h.w", int_loongarch_lsx_vsrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRARN_W_D : LSX_3R<0b01110000111110111>, ++ LSX_3R_DESC_BASE<"vsrarn.w.d", int_loongarch_lsx_vsrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLN_B_H : LSX_3R<0b01110000111111001>, ++ LSX_3R_DESC_BASE<"vssrln.b.h", int_loongarch_lsx_vssrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLN_H_W : LSX_3R<0b01110000111111010>, ++ LSX_3R_DESC_BASE<"vssrln.h.w", int_loongarch_lsx_vssrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLN_W_D : LSX_3R<0b01110000111111011>, ++ LSX_3R_DESC_BASE<"vssrln.w.d", int_loongarch_lsx_vssrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRAN_B_H : LSX_3R<0b01110000111111101>, ++ LSX_3R_DESC_BASE<"vssran.b.h", int_loongarch_lsx_vssran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRAN_H_W : LSX_3R<0b01110000111111110>, ++ LSX_3R_DESC_BASE<"vssran.h.w", int_loongarch_lsx_vssran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRAN_W_D : LSX_3R<0b01110000111111111>, ++ LSX_3R_DESC_BASE<"vssran.w.d", int_loongarch_lsx_vssran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRN_B_H : LSX_3R<0b01110001000000001>, ++ LSX_3R_DESC_BASE<"vssrlrn.b.h", int_loongarch_lsx_vssrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRN_H_W : LSX_3R<0b01110001000000010>, ++ LSX_3R_DESC_BASE<"vssrlrn.h.w", int_loongarch_lsx_vssrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRN_W_D : LSX_3R<0b01110001000000011>, ++ LSX_3R_DESC_BASE<"vssrlrn.w.d", int_loongarch_lsx_vssrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRARN_B_H : LSX_3R<0b01110001000000101>, ++ LSX_3R_DESC_BASE<"vssrarn.b.h", int_loongarch_lsx_vssrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRARN_H_W : LSX_3R<0b01110001000000110>, ++ LSX_3R_DESC_BASE<"vssrarn.h.w", int_loongarch_lsx_vssrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRARN_W_D : LSX_3R<0b01110001000000111>, ++ LSX_3R_DESC_BASE<"vssrarn.w.d", int_loongarch_lsx_vssrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLN_BU_H : LSX_3R<0b01110001000001001>, ++ LSX_3R_DESC_BASE<"vssrln.bu.h", int_loongarch_lsx_vssrln_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLN_HU_W : LSX_3R<0b01110001000001010>, ++ LSX_3R_DESC_BASE<"vssrln.hu.w", int_loongarch_lsx_vssrln_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLN_WU_D : LSX_3R<0b01110001000001011>, ++ LSX_3R_DESC_BASE<"vssrln.wu.d", int_loongarch_lsx_vssrln_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRAN_BU_H : LSX_3R<0b01110001000001101>, ++ LSX_3R_DESC_BASE<"vssran.bu.h", int_loongarch_lsx_vssran_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRAN_HU_W : LSX_3R<0b01110001000001110>, ++ LSX_3R_DESC_BASE<"vssran.hu.w", int_loongarch_lsx_vssran_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRAN_WU_D : LSX_3R<0b01110001000001111>, ++ LSX_3R_DESC_BASE<"vssran.wu.d", int_loongarch_lsx_vssran_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRN_BU_H : LSX_3R<0b01110001000010001>, ++ LSX_3R_DESC_BASE<"vssrlrn.bu.h", int_loongarch_lsx_vssrlrn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRN_HU_W : LSX_3R<0b01110001000010010>, ++ LSX_3R_DESC_BASE<"vssrlrn.hu.w", int_loongarch_lsx_vssrlrn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRN_WU_D : LSX_3R<0b01110001000010011>, ++ LSX_3R_DESC_BASE<"vssrlrn.wu.d", int_loongarch_lsx_vssrlrn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRARN_BU_H : LSX_3R<0b01110001000010101>, ++ LSX_3R_DESC_BASE<"vssrarn.bu.h", int_loongarch_lsx_vssrarn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRARN_HU_W : LSX_3R<0b01110001000010110>, ++ LSX_3R_DESC_BASE<"vssrarn.hu.w", int_loongarch_lsx_vssrarn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRARN_WU_D : LSX_3R<0b01110001000010111>, ++ LSX_3R_DESC_BASE<"vssrarn.wu.d", int_loongarch_lsx_vssrarn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VANDN_V : LSX_3R<0b01110001001010000>, ++ LSX_3R_DESC_BASE<"vandn.v", int_loongarch_lsx_vandn_v, LSX128BOpnd>; ++ ++ ++class LSX_VANDN_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), ++ []>, ++ PseudoInstExpansion<(VANDN_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++def VANDN_H_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++def VANDN_W_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++def VANDN_D_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++ ++ ++ ++def VORN_V : LSX_3R<0b01110001001010001>, ++ LSX_3R_DESC_BASE<"vorn.v", int_loongarch_lsx_vorn_v, LSX128BOpnd>; ++ ++ ++class LSX_VORN_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), ++ []>, ++ PseudoInstExpansion<(VORN_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++def VORN_H_PSEUDO : LSX_VORN_PSEUDO_BASE; ++def VORN_W_PSEUDO : LSX_VORN_PSEUDO_BASE; ++def VORN_D_PSEUDO : LSX_VORN_PSEUDO_BASE; ++ ++ ++def VFRSTP_B : LSX_3R<0b01110001001010110>, ++ LSX_3R_4R_DESC_BASE<"vfrstp.b", int_loongarch_lsx_vfrstp_b, LSX128BOpnd>; ++ ++def VFRSTP_H : LSX_3R<0b01110001001010111>, ++ LSX_3R_4R_DESC_BASE<"vfrstp.h", int_loongarch_lsx_vfrstp_h, LSX128HOpnd>; ++ ++ ++def VADD_Q : LSX_3R<0b01110001001011010>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.q", int_loongarch_lsx_vadd_q, LSX128DOpnd>; ++ ++def VSUB_Q : LSX_3R<0b01110001001011011>, ++ LSX_3R_DESC_BASE<"vsub.q", int_loongarch_lsx_vsub_q, LSX128DOpnd>; ++ ++ ++def VSIGNCOV_B : LSX_3R<0b01110001001011100>, ++ LSX_3R_DESC_BASE<"vsigncov.b", int_loongarch_lsx_vsigncov_b, LSX128BOpnd>; ++ ++def VSIGNCOV_H : LSX_3R<0b01110001001011101>, ++ LSX_3R_DESC_BASE<"vsigncov.h", int_loongarch_lsx_vsigncov_h, LSX128HOpnd>; ++ ++def VSIGNCOV_W : LSX_3R<0b01110001001011110>, ++ LSX_3R_DESC_BASE<"vsigncov.w", int_loongarch_lsx_vsigncov_w, LSX128WOpnd>; ++ ++def VSIGNCOV_D : LSX_3R<0b01110001001011111>, ++ LSX_3R_DESC_BASE<"vsigncov.d", int_loongarch_lsx_vsigncov_d, LSX128DOpnd>; ++ ++ ++def VFCVT_H_S : LSX_3R<0b01110001010001100>, ++ LSX_3RF_DESC_BASE<"vfcvt.h.s", int_loongarch_lsx_vfcvt_h_s, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VFCVT_S_D : LSX_3R<0b01110001010001101>, ++ LSX_3RF_DESC_BASE1<"vfcvt.s.d", int_loongarch_lsx_vfcvt_s_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VFFINT_S_L : LSX_3R<0b01110001010010000>, ++ LSX_3RF_DESC_BASE<"vffint.s.l", int_loongarch_lsx_vffint_s_l, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINT_W_D : LSX_3R<0b01110001010010011>, ++ LSX_3RF_DESC_BASE<"vftint.w.d", int_loongarch_lsx_vftint_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VFTINTRZ_W_D : LSX_3R<0b01110001010010110>, ++ LSX_3RF_DESC_BASE<"vftintrz.w.d", int_loongarch_lsx_vftintrz_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRP_W_D : LSX_3R<0b01110001010010101>, ++ LSX_3RF_DESC_BASE<"vftintrp.w.d", int_loongarch_lsx_vftintrp_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRM_W_D : LSX_3R<0b01110001010010100>, ++ LSX_3RF_DESC_BASE<"vftintrm.w.d", int_loongarch_lsx_vftintrm_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRNE_W_D : LSX_3R<0b01110001010010111>, ++ LSX_3RF_DESC_BASE<"vftintrne.w.d", int_loongarch_lsx_vftintrne_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBSRL_V : LSX_I5_U<0b01110010100011101>, ++ LSX_U5_DESC_BASE<"vbsrl.v", int_loongarch_lsx_vbsrl_v, LSX128BOpnd>; ++ ++def VBSLL_V : LSX_I5_U<0b01110010100011100>, ++ LSX_U5_DESC_BASE<"vbsll.v", int_loongarch_lsx_vbsll_v, LSX128BOpnd>; ++ ++ ++def VFRSTPI_B : LSX_I5_U<0b01110010100110100>, ++ LSX_U5_4R_DESC_BASE<"vfrstpi.b", int_loongarch_lsx_vfrstpi_b, LSX128BOpnd>; ++ ++def VFRSTPI_H : LSX_I5_U<0b01110010100110101>, ++ LSX_U5_4R_DESC_BASE<"vfrstpi.h", int_loongarch_lsx_vfrstpi_h, LSX128HOpnd>; ++ ++ ++def VNEG_B : LSX_2R<0b0111001010011100001100>, ++ LSX_2R_DESC_BASE<"vneg.b", int_loongarch_lsx_vneg_b, LSX128BOpnd>; ++ ++def VNEG_H : LSX_2R<0b0111001010011100001101>, ++ LSX_2R_DESC_BASE<"vneg.h", int_loongarch_lsx_vneg_h, LSX128HOpnd>; ++ ++def VNEG_W : LSX_2R<0b0111001010011100001110>, ++ LSX_2R_DESC_BASE<"vneg.w", int_loongarch_lsx_vneg_w, LSX128WOpnd>; ++ ++def VNEG_D : LSX_2R<0b0111001010011100001111>, ++ LSX_2R_DESC_BASE<"vneg.d", int_loongarch_lsx_vneg_d, LSX128DOpnd>; ++ ++ ++def VMSKGEZ_B : LSX_2R<0b0111001010011100010100>, ++ LSX_2R_DESC_BASE<"vmskgez.b", int_loongarch_lsx_vmskgez_b, LSX128BOpnd>; ++ ++def VMSKNZ_B : LSX_2R<0b0111001010011100011000>, ++ LSX_2R_DESC_BASE<"vmsknz.b", int_loongarch_lsx_vmsknz_b, LSX128BOpnd>; ++ ++ ++def VFRINTRM_S : LSX_2R<0b0111001010011101010001>, ++ LSX_2RF_DESC_BASE<"vfrintrm.s", int_loongarch_lsx_vfrintrm_s, LSX128WOpnd>; ++ ++def VFRINTRM_D : LSX_2R<0b0111001010011101010010>, ++ LSX_2RF_DESC_BASE<"vfrintrm.d", int_loongarch_lsx_vfrintrm_d, LSX128DOpnd>; ++ ++ ++def VFRINTRP_S : LSX_2R<0b0111001010011101010101>, ++ LSX_2RF_DESC_BASE<"vfrintrp.s", int_loongarch_lsx_vfrintrp_s, LSX128WOpnd>; ++ ++def VFRINTRP_D : LSX_2R<0b0111001010011101010110>, ++ LSX_2RF_DESC_BASE<"vfrintrp.d", int_loongarch_lsx_vfrintrp_d, LSX128DOpnd>; ++ ++ ++def VFRINTRZ_S : LSX_2R<0b0111001010011101011001>, ++ LSX_2RF_DESC_BASE<"vfrintrz.s", int_loongarch_lsx_vfrintrz_s, LSX128WOpnd>; ++ ++def VFRINTRZ_D : LSX_2R<0b0111001010011101011010>, ++ LSX_2RF_DESC_BASE<"vfrintrz.d", int_loongarch_lsx_vfrintrz_d, LSX128DOpnd>; ++ ++ ++def VFRINTRNE_S : LSX_2R<0b0111001010011101011101>, ++ LSX_2RF_DESC_BASE<"vfrintrne.s", int_loongarch_lsx_vfrintrne_s, LSX128WOpnd>; ++ ++def VFRINTRNE_D : LSX_2R<0b0111001010011101011110>, ++ LSX_2RF_DESC_BASE<"vfrintrne.d", int_loongarch_lsx_vfrintrne_d, LSX128DOpnd>; ++ ++ ++def VFFINTL_D_W : LSX_2R<0b0111001010011110000100>, ++ LSX_2RF_DESC_BASE<"vffintl.d.w", int_loongarch_lsx_vffintl_d_w, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFFINTH_D_W : LSX_2R<0b0111001010011110000101>, ++ LSX_2RF_DESC_BASE<"vffinth.d.w", int_loongarch_lsx_vffinth_d_w, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRM_W_S : LSX_2R<0b0111001010011110001110>, ++ LSX_2RF_DESC_BASE<"vftintrm.w.s", int_loongarch_lsx_vftintrm_w_s, LSX128WOpnd>; ++ ++def VFTINTRM_L_D : LSX_2R<0b0111001010011110001111>, ++ LSX_2RF_DESC_BASE<"vftintrm.l.d", int_loongarch_lsx_vftintrm_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTRP_W_S : LSX_2R<0b0111001010011110010000>, ++ LSX_2RF_DESC_BASE<"vftintrp.w.s", int_loongarch_lsx_vftintrp_w_s, LSX128WOpnd>; ++ ++def VFTINTRP_L_D : LSX_2R<0b0111001010011110010001>, ++ LSX_2RF_DESC_BASE<"vftintrp.l.d", int_loongarch_lsx_vftintrp_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTRZ_W_S : LSX_2R<0b0111001010011110010010>, ++ LSX_2RF_DESC_BASE<"vftintrz.w.s", fp_to_sint, LSX128WOpnd>; ++ ++def VFTINTRZ_L_D : LSX_2R<0b0111001010011110010011>, ++ LSX_2RF_DESC_BASE<"vftintrz.l.d", fp_to_sint, LSX128DOpnd>; ++ ++ ++def VFTINTRNE_W_S : LSX_2R<0b0111001010011110010100>, ++ LSX_2RF_DESC_BASE<"vftintrne.w.s", int_loongarch_lsx_vftintrne_w_s, LSX128WOpnd>; ++ ++def VFTINTRNE_L_D : LSX_2R<0b0111001010011110010101>, ++ LSX_2RF_DESC_BASE<"vftintrne.l.d", int_loongarch_lsx_vftintrne_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTL_L_S : LSX_2R<0b0111001010011110100000>, ++ LSX_2RF_DESC_BASE<"vftintl.l.s", int_loongarch_lsx_vftintl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTH_L_S : LSX_2R<0b0111001010011110100001>, ++ LSX_2RF_DESC_BASE<"vftinth.l.s", int_loongarch_lsx_vftinth_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRML_L_S : LSX_2R<0b0111001010011110100010>, ++ LSX_2RF_DESC_BASE<"vftintrml.l.s", int_loongarch_lsx_vftintrml_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRMH_L_S : LSX_2R<0b0111001010011110100011>, ++ LSX_2RF_DESC_BASE<"vftintrmh.l.s", int_loongarch_lsx_vftintrmh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRPL_L_S : LSX_2R<0b0111001010011110100100>, ++ LSX_2RF_DESC_BASE<"vftintrpl.l.s", int_loongarch_lsx_vftintrpl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRPH_L_S : LSX_2R<0b0111001010011110100101>, ++ LSX_2RF_DESC_BASE<"vftintrph.l.s", int_loongarch_lsx_vftintrph_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRZL_L_S : LSX_2R<0b0111001010011110100110>, ++ LSX_2RF_DESC_BASE<"vftintrzl.l.s", int_loongarch_lsx_vftintrzl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRZH_L_S : LSX_2R<0b0111001010011110100111>, ++ LSX_2RF_DESC_BASE<"vftintrzh.l.s", int_loongarch_lsx_vftintrzh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRNEL_L_S : LSX_2R<0b0111001010011110101000>, ++ LSX_2RF_DESC_BASE<"vftintrnel.l.s", int_loongarch_lsx_vftintrnel_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRNEH_L_S : LSX_2R<0b0111001010011110101001>, ++ LSX_2RF_DESC_BASE<"vftintrneh.l.s", int_loongarch_lsx_vftintrneh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VEXTH_H_B : LSX_2R<0b0111001010011110111000>, ++ LSX_2R_DESC_BASE<"vexth.h.b", int_loongarch_lsx_vexth_h_b, LSX128HOpnd, LSX128BOpnd>; ++ ++def VEXTH_W_H : LSX_2R<0b0111001010011110111001>, ++ LSX_2R_DESC_BASE<"vexth.w.h", int_loongarch_lsx_vexth_w_h, LSX128WOpnd, LSX128HOpnd>; ++ ++def VEXTH_D_W : LSX_2R<0b0111001010011110111010>, ++ LSX_2R_DESC_BASE<"vexth.d.w", int_loongarch_lsx_vexth_d_w, LSX128DOpnd, LSX128WOpnd> ; ++ ++def VEXTH_Q_D : LSX_2R<0b0111001010011110111011>, ++ LSX_2R_DESC_BASE<"vexth.q.d", int_loongarch_lsx_vexth_q_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VEXTH_HU_BU : LSX_2R<0b0111001010011110111100>, ++ LSX_2R_DESC_BASE<"vexth.hu.bu", int_loongarch_lsx_vexth_hu_bu, LSX128HOpnd, LSX128BOpnd>; ++ ++def VEXTH_WU_HU : LSX_2R<0b0111001010011110111101>, ++ LSX_2R_DESC_BASE<"vexth.wu.hu", int_loongarch_lsx_vexth_wu_hu, LSX128WOpnd, LSX128HOpnd>; ++ ++def VEXTH_DU_WU : LSX_2R<0b0111001010011110111110>, ++ LSX_2R_DESC_BASE<"vexth.du.wu", int_loongarch_lsx_vexth_du_wu, LSX128DOpnd, LSX128WOpnd> ; ++ ++def VEXTH_QU_DU : LSX_2R<0b0111001010011110111111>, ++ LSX_2R_DESC_BASE<"vexth.qu.du", int_loongarch_lsx_vexth_qu_du, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSLLWIL_H_B : LSX_I3_U<0b0111001100001000001>, ++ LSX_2R_U3_DESC_BASE<"vsllwil.h.b", int_loongarch_lsx_vsllwil_h_b, LSX128HOpnd, LSX128BOpnd>; ++ ++def VSLLWIL_W_H : LSX_I4_U<0b011100110000100001>, ++ LSX_2R_U4_DESC_BASE<"vsllwil.w.h", int_loongarch_lsx_vsllwil_w_h, LSX128WOpnd, LSX128HOpnd>; ++ ++def VSLLWIL_D_W : LSX_I5_U<0b01110011000010001>, ++ LSX_2R_U5_DESC_BASE<"vsllwil.d.w", int_loongarch_lsx_vsllwil_d_w, LSX128DOpnd, LSX128WOpnd> ; ++ ++ ++def VEXTL_Q_D : LSX_2R<0b0111001100001001000000>, ++ LSX_2R_DESC_BASE<"vextl.q.d", int_loongarch_lsx_vextl_q_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSLLWIL_HU_BU : LSX_I3_U<0b0111001100001100001>, ++ LSX_2R_U3_DESC_BASE<"vsllwil.hu.bu", int_loongarch_lsx_vsllwil_hu_bu, LSX128HOpnd, LSX128BOpnd>; ++ ++def VSLLWIL_WU_HU : LSX_I4_U<0b011100110000110001>, ++ LSX_2R_U4_DESC_BASE<"vsllwil.wu.hu", int_loongarch_lsx_vsllwil_wu_hu, LSX128WOpnd, LSX128HOpnd>; ++ ++def VSLLWIL_DU_WU : LSX_I5_U<0b01110011000011001>, ++ LSX_2R_U5_DESC_BASE<"vsllwil.du.wu", int_loongarch_lsx_vsllwil_du_wu, LSX128DOpnd, LSX128WOpnd> ; ++ ++ ++def VEXTL_QU_DU : LSX_2R<0b0111001100001101000000>, ++ LSX_2R_DESC_BASE<"vextl.qu.du", int_loongarch_lsx_vextl_qu_du, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITCLRI_B : LSX_I3_U<0b0111001100010000001>, ++ LSX_2R_U3_DESC_BASE<"vbitclri.b", int_loongarch_lsx_vbitclri_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITCLRI_H : LSX_I4_U<0b011100110001000001>, ++ LSX_2R_U4_DESC_BASE<"vbitclri.h", int_loongarch_lsx_vbitclri_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITCLRI_W : LSX_I5_U<0b01110011000100001>, ++ LSX_2R_U5_DESC_BASE<"vbitclri.w", int_loongarch_lsx_vbitclri_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITCLRI_D : LSX_I6_U<0b0111001100010001>, ++ LSX_2R_U6_DESC_BASE<"vbitclri.d", int_loongarch_lsx_vbitclri_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITSETI_B : LSX_I3_U<0b0111001100010100001>, ++ LSX_2R_U3_DESC_BASE<"vbitseti.b", int_loongarch_lsx_vbitseti_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITSETI_H : LSX_I4_U<0b011100110001010001>, ++ LSX_2R_U4_DESC_BASE<"vbitseti.h", int_loongarch_lsx_vbitseti_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITSETI_W : LSX_I5_U<0b01110011000101001>, ++ LSX_2R_U5_DESC_BASE<"vbitseti.w", int_loongarch_lsx_vbitseti_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITSETI_D : LSX_I6_U<0b0111001100010101>, ++ LSX_2R_U6_DESC_BASE<"vbitseti.d", int_loongarch_lsx_vbitseti_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITREVI_B : LSX_I3_U<0b0111001100011000001>, ++ LSX_2R_U3_DESC_BASE<"vbitrevi.b", int_loongarch_lsx_vbitrevi_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITREVI_H : LSX_I4_U<0b011100110001100001>, ++ LSX_2R_U4_DESC_BASE<"vbitrevi.h", int_loongarch_lsx_vbitrevi_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITREVI_W : LSX_I5_U<0b01110011000110001>, ++ LSX_2R_U5_DESC_BASE<"vbitrevi.w", int_loongarch_lsx_vbitrevi_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITREVI_D : LSX_I6_U<0b0111001100011001>, ++ LSX_2R_U6_DESC_BASE<"vbitrevi.d", int_loongarch_lsx_vbitrevi_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRNI_B_H : LSX_I4_U<0b011100110101000001>, ++ LSX_2R_3R_U4_DESC_BASE<"vssrlrni.b.h", int_loongarch_lsx_vssrlrni_b_h, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSSRLRNI_H_W : LSX_I5_U<0b01110011010100001>, ++ LSX_2R_3R_U5_DESC_BASE<"vssrlrni.h.w", int_loongarch_lsx_vssrlrni_h_w, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRNI_W_D : LSX_I6_U<0b0111001101010001>, ++ LSX_2R_3R_U6_DESC_BASE<"vssrlrni.w.d", int_loongarch_lsx_vssrlrni_w_d, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRNI_D_Q : LSX_I7_U<0b011100110101001>, ++ LSX_2R_3R_U7_DESC_BASE<"vssrlrni.d.q", int_loongarch_lsx_vssrlrni_d_q, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRANI_B_H : LSX_I4_U<0b011100110101100001>, ++ LSX_2R_3R_U4_DESC_BASE<"vsrani.b.h", int_loongarch_lsx_vsrani_b_h, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSRANI_H_W : LSX_I5_U<0b01110011010110001>, ++ LSX_2R_3R_U5_DESC_BASE<"vsrani.h.w", int_loongarch_lsx_vsrani_h_w, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRANI_W_D : LSX_I6_U<0b0111001101011001>, ++ LSX_2R_3R_U6_DESC_BASE<"vsrani.w.d", int_loongarch_lsx_vsrani_w_d, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRANI_D_Q : LSX_I7_U<0b011100110101101>, ++ LSX_2R_3R_U7_DESC_BASE<"vsrani.d.q", int_loongarch_lsx_vsrani_d_q, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VEXTRINS_B : LSX_I8_U<0b01110011100011>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.b", int_loongarch_lsx_vextrins_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VEXTRINS_H : LSX_I8_U<0b01110011100010>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.h", int_loongarch_lsx_vextrins_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VEXTRINS_W : LSX_I8_U<0b01110011100001>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.w", int_loongarch_lsx_vextrins_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VEXTRINS_D : LSX_I8_U<0b01110011100000>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.d", int_loongarch_lsx_vextrins_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITSELI_B : LSX_I8_U<0b01110011110001>, ++ LSX_2R_3R_U8_DESC_BASE<"vbitseli.b", int_loongarch_lsx_vbitseli_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VANDI_B : LSX_I8_U<0b01110011110100>, ++ LSX_2R_U8_DESC_BASE<"vandi.b", int_loongarch_lsx_vandi_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VORI_B : LSX_I8_U<0b01110011110101>, ++ LSX_2R_U8_DESC_BASE<"vori.b", int_loongarch_lsx_vori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VXORI_B : LSX_I8_U<0b01110011110110>, ++ LSX_2R_U8_DESC_BASE<"vxori.b", int_loongarch_lsx_vxori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VNORI_B : LSX_I8_U<0b01110011110111>, ++ LSX_2R_U8_DESC_BASE<"vnori.b", int_loongarch_lsx_vnori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VLDI : LSX_1R_I13<0b01110011111000>, ++ LSX_I13_DESC_BASE<"vldi", int_loongarch_lsx_vldi, i32, simm13Op, LSX128DOpnd>; ++ ++def VLDI_B : LSX_1R_I13_I10<0b01110011111000000>, ++ LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_b, simm10, immZExt10, LSX128BOpnd>; ++ ++def VLDI_H : LSX_1R_I13_I10<0b01110011111000001>, ++ LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_h, simm10, immZExt10, LSX128HOpnd>; ++ ++def VLDI_W : LSX_1R_I13_I10<0b01110011111000010>, ++ LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_w, simm10, immZExt10, LSX128WOpnd>; ++ ++def VLDI_D : LSX_1R_I13_I10<0b01110011111000011>, ++ LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_d, simm10, immZExt10, LSX128DOpnd>; ++ ++def VPERMI_W : LSX_I8_U<0b01110011111001>, ++ LSX_2R_3R_U8_DESC_BASE<"vpermi.w", int_loongarch_lsx_vpermi_w, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VSEQ_B : LSX_3R<0b01110000000000000>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.b", vseteq_v16i8, LSX128BOpnd>; ++ ++def VSEQ_H : LSX_3R<0b01110000000000001>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.h", vseteq_v8i16, LSX128HOpnd>; ++ ++def VSEQ_W : LSX_3R<0b01110000000000010>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.w", vseteq_v4i32, LSX128WOpnd> ; ++ ++def VSEQ_D : LSX_3R<0b01110000000000011>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.d", vseteq_v2i64, LSX128DOpnd>; ++ ++ ++def VSLE_B : LSX_3R<0b01110000000000100>, ++ LSX_3R_DESC_BASE<"vsle.b", vsetle_v16i8, LSX128BOpnd>; ++ ++def VSLE_H : LSX_3R<0b01110000000000101>, ++ LSX_3R_DESC_BASE<"vsle.h", vsetle_v8i16, LSX128HOpnd>; ++ ++def VSLE_W : LSX_3R<0b01110000000000110>, ++ LSX_3R_DESC_BASE<"vsle.w", vsetle_v4i32, LSX128WOpnd>; ++ ++def VSLE_D : LSX_3R<0b01110000000000111>, ++ LSX_3R_DESC_BASE<"vsle.d", vsetle_v2i64, LSX128DOpnd>; ++ ++ ++def VSLE_BU : LSX_3R<0b01110000000001000>, ++ LSX_3R_DESC_BASE<"vsle.bu", vsetule_v16i8, LSX128BOpnd>; ++ ++def VSLE_HU : LSX_3R<0b01110000000001001>, ++ LSX_3R_DESC_BASE<"vsle.hu", vsetule_v8i16, LSX128HOpnd>; ++ ++def VSLE_WU : LSX_3R<0b01110000000001010>, ++ LSX_3R_DESC_BASE<"vsle.wu", vsetule_v4i32, LSX128WOpnd>; ++ ++def VSLE_DU : LSX_3R<0b01110000000001011>, ++ LSX_3R_DESC_BASE<"vsle.du", vsetule_v2i64, LSX128DOpnd>; ++ ++ ++def VSLT_B : LSX_3R<0b01110000000001100>, ++ LSX_3R_DESC_BASE<"vslt.b", vsetlt_v16i8, LSX128BOpnd>; ++ ++def VSLT_H : LSX_3R<0b01110000000001101>, ++ LSX_3R_DESC_BASE<"vslt.h", vsetlt_v8i16, LSX128HOpnd>; ++ ++def VSLT_W : LSX_3R<0b01110000000001110>, ++ LSX_3R_DESC_BASE<"vslt.w", vsetlt_v4i32, LSX128WOpnd>; ++ ++def VSLT_D : LSX_3R<0b01110000000001111>, ++ LSX_3R_DESC_BASE<"vslt.d", vsetlt_v2i64, LSX128DOpnd>; ++ ++ ++def VSLT_BU : LSX_3R<0b01110000000010000>, ++ LSX_3R_DESC_BASE<"vslt.bu", vsetult_v16i8, LSX128BOpnd>; ++ ++def VSLT_HU : LSX_3R<0b01110000000010001>, ++ LSX_3R_DESC_BASE<"vslt.hu", vsetult_v8i16, LSX128HOpnd>; ++ ++def VSLT_WU : LSX_3R<0b01110000000010010>, ++ LSX_3R_DESC_BASE<"vslt.wu", vsetult_v4i32, LSX128WOpnd>; ++ ++def VSLT_DU : LSX_3R<0b01110000000010011>, ++ LSX_3R_DESC_BASE<"vslt.du", vsetult_v2i64, LSX128DOpnd>; ++ ++ ++def VADD_B : LSX_3R<0b01110000000010100>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.b", add, LSX128BOpnd>; ++ ++def VADD_H : LSX_3R<0b01110000000010101>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.h", add, LSX128HOpnd>; ++ ++def VADD_W : LSX_3R<0b01110000000010110>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.w", add, LSX128WOpnd>; ++ ++def VADD_D : LSX_3R<0b01110000000010111>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.d", add, LSX128DOpnd>; ++ ++ ++def VSUB_B : LSX_3R<0b01110000000011000>, ++ LSX_3R_DESC_BASE<"vsub.b", sub, LSX128BOpnd>; ++ ++def VSUB_H : LSX_3R<0b01110000000011001>, ++ LSX_3R_DESC_BASE<"vsub.h", sub, LSX128HOpnd>; ++ ++def VSUB_W : LSX_3R<0b01110000000011010>, ++ LSX_3R_DESC_BASE<"vsub.w", sub, LSX128WOpnd>; ++ ++def VSUB_D : LSX_3R<0b01110000000011011>, ++ LSX_3R_DESC_BASE<"vsub.d", sub, LSX128DOpnd>; ++ ++ ++ ++//Pat ++class LSXBitconvertPat preds = [HasLSX]> : ++ LSXPat<(DstVT (bitconvert SrcVT:$src)), ++ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++ ++ ++ ++def : LSXPat<(i32 (vextract_sext_i8 v16i8:$vj, i32:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 24))>; ++def : LSXPat<(i32 (vextract_sext_i16 v8i16:$vj, i32:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 16))>; ++def : LSXPat<(i32 (vextract_sext_i32 v4i32:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32)>; ++def : LSXPat<(i64 (vextract_sext_i64 v2i64:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, ++ i32:$idx), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat<(i32 (vextract_zext_i8 v16i8:$vj, i32:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 24))>; ++def : LSXPat<(i32 (vextract_zext_i16 v8i16:$vj, i32:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 16))>; ++def : LSXPat<(i32 (vextract_zext_i32 v4i32:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32)>; ++ ++def : LSXPat<(i64 (vextract_zext_i64 v2i64:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, ++ i32:$idx), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat<(f32 (vector_extract v4f32:$vj, i32:$idx)), ++ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, ++ i32:$idx), ++ sub_lo))>; ++def : LSXPat<(f64 (vector_extract v2f64:$vj, i32:$idx)), ++ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, ++ i32:$idx), ++ sub_64))>; ++ ++def : LSXPat< ++ (i32 (vextract_sext_i8 v16i8:$vj, i64:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_B v16i8:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 24))>; ++def : LSXPat< ++ (i32 (vextract_sext_i16 v8i16:$vj, i64:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_H v8i16:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 16))>; ++ ++def : LSXPat< ++ (i32 (vextract_sext_i32 v4i32:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_W v4i32:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32)>; ++ ++def : LSXPat< ++ (i64 (vextract_sext_i64 v2i64:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i64 (EXTRACT_SUBREG ++ (VREPLVE_D v2i64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat< ++ (i32 (vextract_zext_i8 v16i8:$vj, i64:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_B v16i8:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 24))>; ++def : LSXPat< ++ (i32 (vextract_zext_i16 v8i16:$vj, i64:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_H v8i16:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 16))>; ++def : LSXPat< ++ (i32 (vextract_zext_i32 v4i32:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_W v4i32:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32)>; ++def : LSXPat< ++ (i64 (vextract_zext_i64 v2i64:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i64 (EXTRACT_SUBREG ++ (VREPLVE_D v2i64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64)), ++ GPR64)>; ++ ++ def : LSXPat< ++ (f32 (vector_extract v4f32:$vj, i64:$idx)), ++ (f32 (EXTRACT_SUBREG ++ (VREPLVE_W v4f32:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo))>; ++def : LSXPat< ++ (f64 (vector_extract v2f64:$vj, i64:$idx)), ++ (f64 (EXTRACT_SUBREG ++ (VREPLVE_D v2f64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64))>; ++ ++ ++def : LSXPat<(vfseteq_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CEQ_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfseteq_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CEQ_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetle_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CLE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetle_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CLE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetlt_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CLT_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetlt_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CLT_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetne_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CNE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetne_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CNE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++ ++class LSX_INSERT_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ImmOp:$n, ROFS:$fs), ++ [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, Imm:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++ ++class INSERT_FW_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; ++class INSERT_FD_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; ++ ++def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC; ++def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC; ++ ++ ++class LSX_INSERT_VIDX_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ROIdx:$n, ROFS:$fs), ++ [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, ++ ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class INSERT_H_VIDX64_PSEUDO_DESC : ++ LSX_INSERT_VIDX_PSEUDO_BASE; ++def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC; ++ ++class INSERTPostRA : ++ LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++def INSERT_H_VIDX64_PSEUDO_POSTRA : INSERTPostRA; ++ ++class LSX_COPY_PSEUDO_BASE : ++ LSXPseudo<(outs RCD:$vd), (ins RCVS:$vj, ImmOp:$n), ++ [(set RCD:$vd, (OpNode (VecTy RCVS:$vj), Imm:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++ ++class COPY_FW_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; ++class COPY_FD_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; ++def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC; ++def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC; ++ ++ ++let isCodeGenOnly = 1 in { ++ ++def VST_H : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v8i16, LSX128HOpnd, mem_simm12>; ++def VST_W : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v4i32, LSX128WOpnd, mem_simm12>; ++def VST_D : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v2i64, LSX128DOpnd, mem_simm12>; ++ ++ ++def VLD_H : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v8i16, LSX128HOpnd, mem_simm12>; ++def VLD_W : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v4i32, LSX128WOpnd, mem_simm12>; ++def VLD_D : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v2i64, LSX128DOpnd, mem_simm12>; ++ ++ ++ ++def VANDI_B_N : LSX_I8_U<0b01110011110100>, ++ LSX_BIT_U8_VREPLVE_DESC_BASE<"vandi.b", and, vsplati8_uimm8, LSX128BOpnd>; ++ ++ ++def VXORI_B_N : LSX_I8_U<0b01110011110110>, ++ LSX_BIT_U8_VREPLVE_DESC_BASE<"vxori.b", xor, vsplati8_uimm8, LSX128BOpnd>; ++ ++ ++def VSRAI_B_N : LSX_I3_U<0b0111001100110100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrai.b", sra, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSRAI_H_N : LSX_I4_U<0b011100110011010001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrai.h", sra, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSRAI_W_N : LSX_I5_U<0b01110011001101001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrai.w", sra, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSRAI_D_N : LSX_I6_U<0b0111001100110101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrai.d", sra, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VMAXI_BU_N : LSX_I5_U<0b01110010100101000>, ++ LSX_I5_U_DESC_BASE<"vmaxi.bu", umax, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VMAXI_HU_N : LSX_I5_U<0b01110010100101001>, ++ LSX_I5_U_DESC_BASE<"vmaxi.hu", umax, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VMAXI_WU_N : LSX_I5_U<0b01110010100101010>, ++ LSX_I5_U_DESC_BASE<"vmaxi.wu", umax, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VMAXI_DU_N : LSX_I5_U<0b01110010100101011>, ++ LSX_I5_U_DESC_BASE<"vmaxi.du", umax, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VMINI_B_N : LSX_I5<0b01110010100100100>, ++ LSX_I5_DESC_BASE<"vmini.b", smin, vsplati8_simm5, LSX128BOpnd>; ++ ++def VMINI_H_N : LSX_I5<0b01110010100100101>, ++ LSX_I5_DESC_BASE<"vmini.h", smin, vsplati16_simm5, LSX128HOpnd>; ++ ++def VMINI_W_N : LSX_I5<0b01110010100100110>, ++ LSX_I5_DESC_BASE<"vmini.w", smin, vsplati32_simm5, LSX128WOpnd>; ++ ++def VMINI_D_N : LSX_I5<0b01110010100100111>, ++ LSX_I5_DESC_BASE<"vmini.d", smin, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VMAXI_B_N : LSX_I5<0b01110010100100000>, ++ LSX_I5_DESC_BASE<"vmaxi.b", smax, vsplati8_simm5, LSX128BOpnd>; ++ ++def VMAXI_H_N : LSX_I5<0b01110010100100001>, ++ LSX_I5_DESC_BASE<"vmaxi.h", smax, vsplati16_simm5, LSX128HOpnd>; ++ ++def VMAXI_W_N : LSX_I5<0b01110010100100010>, ++ LSX_I5_DESC_BASE<"vmaxi.w", smax, vsplati32_simm5, LSX128WOpnd>; ++ ++def VMAXI_D_N : LSX_I5<0b01110010100100011>, ++ LSX_I5_DESC_BASE<"vmaxi.d", smax, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSEQI_B_N : LSX_I5<0b01110010100000000>, ++ LSX_I5_DESC_BASE<"vseqi.b", vseteq_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSEQI_H_N : LSX_I5<0b01110010100000001>, ++ LSX_I5_DESC_BASE<"vseqi.h", vseteq_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSEQI_W_N : LSX_I5<0b01110010100000010>, ++ LSX_I5_DESC_BASE<"vseqi.w", vseteq_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSEQI_D_N : LSX_I5<0b01110010100000011>, ++ LSX_I5_DESC_BASE<"vseqi.d", vseteq_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSLEI_B_N : LSX_I5<0b01110010100000100>, ++ LSX_I5_DESC_BASE<"vslei.b", vsetle_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSLEI_H_N : LSX_I5<0b01110010100000101>, ++ LSX_I5_DESC_BASE<"vslei.h", vsetle_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSLEI_W_N : LSX_I5<0b01110010100000110>, ++ LSX_I5_DESC_BASE<"vslei.w", vsetle_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSLEI_D_N : LSX_I5<0b01110010100000111>, ++ LSX_I5_DESC_BASE<"vslei.d", vsetle_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++def VSLEI_BU_N : LSX_I5_U<0b01110010100001000>, ++ LSX_I5_U_DESC_BASE<"vslei.bu", vsetule_v16i8, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSLEI_HU_N : LSX_I5_U<0b01110010100001001>, ++ LSX_I5_U_DESC_BASE<"vslei.hu", vsetule_v8i16, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSLEI_WU_N : LSX_I5_U<0b01110010100001010>, ++ LSX_I5_U_DESC_BASE<"vslei.wu", vsetule_v4i32, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLEI_DU_N : LSX_I5_U<0b01110010100001011>, ++ LSX_I5_U_DESC_BASE<"vslei.du", vsetule_v2i64, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VSLTI_B_N : LSX_I5<0b01110010100001100>, ++ LSX_I5_DESC_BASE<"vslti.b", vsetlt_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSLTI_H_N : LSX_I5<0b01110010100001101>, ++ LSX_I5_DESC_BASE<"vslti.h", vsetlt_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSLTI_W_N : LSX_I5<0b01110010100001110>, ++ LSX_I5_DESC_BASE<"vslti.w", vsetlt_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSLTI_D_N : LSX_I5<0b01110010100001111>, ++ LSX_I5_DESC_BASE<"vslti.d", vsetlt_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSLTI_BU_N : LSX_I5_U<0b01110010100010000>, ++ LSX_I5_U_DESC_BASE<"vslti.bu", vsetult_v16i8, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSLTI_HU_N : LSX_I5_U<0b01110010100010001>, ++ LSX_I5_U_DESC_BASE<"vslti.hu", vsetult_v8i16, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSLTI_WU_N : LSX_I5_U<0b01110010100010010>, ++ LSX_I5_U_DESC_BASE<"vslti.wu", vsetult_v4i32, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLTI_DU_N : LSX_I5_U<0b01110010100010011>, ++ LSX_I5_U_DESC_BASE<"vslti.du", vsetult_v2i64, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VBITSELI_B_N : LSX_I8_U<0b01110011110001>, ++ LSX_2R_3R_SELECT<"vbitseli.b", vselect, LSX128BOpnd, LSX128BOpnd>; ++ ++} ++ ++ ++def : LSXPat<(v4f32 (load addrimm12:$addr)), (VLD_W addrimm12:$addr)>; ++def : LSXPat<(v2f64 (load addrimm12:$addr)), (VLD_D addrimm12:$addr)>; ++ ++def VST_FW : LSXPat<(store (v4f32 LSX128W:$vj), addrimm12:$addr), ++ (VST_W LSX128W:$vj, addrimm12:$addr)>; ++def VST_FD : LSXPat<(store (v2f64 LSX128D:$vj), addrimm12:$addr), ++ (VST_D LSX128D:$vj, addrimm12:$addr)>; ++ ++def VNEG_FW : LSXPat<(fneg (v4f32 LSX128W:$vj)), ++ (VBITREVI_W LSX128W:$vj, 31)>; ++def VNEG_FD : LSXPat<(fneg (v2f64 LSX128D:$vj)), ++ (VBITREVI_D LSX128D:$vj, 63)>; ++ ++ ++def : LSXPat<(v2i64 (LoongArchVABSD v2i64:$vj, v2i64:$vk, (i32 0))), ++ (v2i64 (VABSD_D $vj, $vk))>; ++ ++def : LSXPat<(v4i32 (LoongArchVABSD v4i32:$vj, v4i32:$vk, (i32 0))), ++ (v4i32 (VABSD_W $vj, $vk))>; ++ ++def : LSXPat<(v8i16 (LoongArchVABSD v8i16:$vj, v8i16:$vk, (i32 0))), ++ (v8i16 (VABSD_H $vj, $vk))>; ++ ++def : LSXPat<(v16i8 (LoongArchVABSD v16i8:$vj, v16i8:$vk, (i32 0))), ++ (v16i8 (VABSD_B $vj, $vk))>; ++ ++def : LSXPat<(v2i64 (LoongArchUVABSD v2i64:$vj, v2i64:$vk, (i32 0))), ++ (v2i64 (VABSD_DU $vj, $vk))>; ++ ++def : LSXPat<(v4i32 (LoongArchUVABSD v4i32:$vj, v4i32:$vk, (i32 0))), ++ (v4i32 (VABSD_WU $vj, $vk))>; ++ ++def : LSXPat<(v8i16 (LoongArchUVABSD v8i16:$vj, v8i16:$vk, (i32 0))), ++ (v8i16 (VABSD_HU $vj, $vk))>; ++ ++def : LSXPat<(v16i8 (LoongArchUVABSD v16i8:$vj, v16i8:$vk, (i32 0))), ++ (v16i8 (VABSD_BU $vj, $vk))>; ++ ++ ++def : LSXPat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (VBITSET_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (VBITSET_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (VBITSET_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (VBITSET_D v2i64:$vj, v2i64:$vk)>; ++ ++def : LSXPat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (VBITREV_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (VBITREV_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (VBITREV_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(xor v2i64:$vj, (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk)), ++ (VBITREV_D v2i64:$vj, v2i64:$vk)>; ++ ++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), immAllOnesV)), ++ (VBITCLR_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), immAllOnesV)), ++ (VBITCLR_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), immAllOnesV)), ++ (VBITCLR_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(and v2i64:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk), (bitconvert (v4i32 immAllOnesV)))), ++ (VBITCLR_D v2i64:$vj, v2i64:$vk)>; ++def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; ++def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; ++def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; ++ ++def vsplati8imm7 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati8 immi32Cst7))>; ++def vsplati16imm15 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati16 immi32Cst15))>; ++def vsplati32imm31 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati32 immi32Cst31))>; ++def vsplati64imm63 : PatFrag<(ops node:$vt), ++ (and node:$vt, vsplati64_imm_eq_63)>; ++ ++class LSXShiftPat : ++ LSXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++class LSXBitPat : ++ LSXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++multiclass LSXShiftPats { ++ def : LSXShiftPat(Insn#_B), ++ (vsplati8 immi32Cst7)>; ++ def : LSXShiftPat(Insn#_H), ++ (vsplati16 immi32Cst15)>; ++ def : LSXShiftPat(Insn#_W), ++ (vsplati32 immi32Cst31)>; ++ def : LSXPat<(v2i64 (Node v2i64:$vs, (v2i64 (and v2i64:$vt, ++ vsplati64_imm_eq_63)))), ++ (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; ++} ++ ++multiclass LSXBitPats { ++ def : LSXBitPat(Insn#_B), vsplati8imm7>; ++ def : LSXBitPat(Insn#_H), vsplati16imm15>; ++ def : LSXBitPat(Insn#_W), vsplati32imm31>; ++ def : LSXPat<(Node v2i64:$vs, (shl (v2i64 vsplati64_imm_eq_1), ++ (vsplati64imm63 v2i64:$vt))), ++ (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; ++} ++ ++defm : LSXShiftPats; ++defm : LSXShiftPats; ++defm : LSXShiftPats; ++defm : LSXBitPats; ++defm : LSXBitPats; ++ ++def : LSXPat<(and v16i8:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v16i8:$vt)), ++ immAllOnesV)), ++ (v16i8 (VBITCLR_B v16i8:$vs, v16i8:$vt))>; ++def : LSXPat<(and v8i16:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v8i16:$vt)), ++ immAllOnesV)), ++ (v8i16 (VBITCLR_H v8i16:$vs, v8i16:$vt))>; ++def : LSXPat<(and v4i32:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v4i32:$vt)), ++ immAllOnesV)), ++ (v4i32 (VBITCLR_W v4i32:$vs, v4i32:$vt))>; ++def : LSXPat<(and v2i64:$vs, (xor (shl (v2i64 vsplati64_imm_eq_1), ++ (vsplati64imm63 v2i64:$vt)), ++ (bitconvert (v4i32 immAllOnesV)))), ++ (v2i64 (VBITCLR_D v2i64:$vs, v2i64:$vt))>; ++ ++ ++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v4f32:$v), ++ (VFRECIP_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), v2f64:$v), ++ (VFRECIP_D v2f64:$v)>; ++ ++def : LSXPat<(fdiv (v4f32 fpimm1), v4f32:$v), ++ (VFRECIP_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 fpimm1), v2f64:$v), ++ (VFRECIP_D v2f64:$v)>; ++ ++ ++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v4f32:$v)), ++ (VFRSQRT_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), (fsqrt v2f64:$v)), ++ (VFRSQRT_D v2f64:$v)>; ++ ++def : LSXPat<(fdiv (v4f32 fpimm1), (fsqrt v4f32:$v)), ++ (VFRSQRT_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 fpimm1), (fsqrt v2f64:$v)), ++ (VFRSQRT_D v2f64:$v)>; ++ ++ ++def : LSXPat<(abs v2i64:$v), ++ (VMAX_D v2i64:$v, (VNEG_D v2i64:$v))>; ++ ++def : LSXPat<(abs v4i32:$v), ++ (VMAX_W v4i32:$v, (VNEG_W v4i32:$v))>; ++ ++def : LSXPat<(abs v8i16:$v), ++ (VMAX_H v8i16:$v, (VNEG_H v8i16:$v))>; ++ ++def : LSXPat<(abs v16i8:$v), ++ (VMAX_B v16i8:$v, (VNEG_B v16i8:$v))>; ++ ++ ++def : LSXPat<(sub (v16i8 immAllZerosV), v16i8:$v), ++ (VNEG_B v16i8:$v)>; ++ ++def : LSXPat<(sub (v8i16 immAllZerosV), v8i16:$v), ++ (VNEG_H v8i16:$v)>; ++ ++def : LSXPat<(sub (v4i32 immAllZerosV), v4i32:$v), ++ (VNEG_W v4i32:$v)>; ++ ++def : LSXPat<(sub (v2i64 immAllZerosV), v2i64:$v), ++ (VNEG_D v2i64:$v)>; ++ ++ ++def : LSXPat<(sra ++ (v16i8 (add ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (srl ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVG_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(sra ++ (v8i16 (add ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (srl ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVG_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(sra ++ (v4i32 (add ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (srl ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (VAVG_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(sra ++ (v2i64 (add ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (srl ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (build_vector (i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v2i64 (build_vector (i64 1),(i64 1)))), ++ (VAVG_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++def : LSXPat<(srl ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(srl ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(srl ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(srl ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (build_vector (i64 1),(i64 1)) ++ ) ++ ), ++ (VAVG_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++ ++def : LSXPat<(sra ++ (v16i8 (add ++ (v16i8 (add (v16i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (srl ++ (v16i8 ( add (v16i8( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVGR_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(sra ++ (v8i16 (add ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (srl ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVGR_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(sra ++ (v4i32 (add ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (srl ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (VAVGR_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(sra ++ (v2i64 (add ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (srl ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (build_vector (i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v2i64 (build_vector (i64 1),(i64 1)))), ++ (VAVGR_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++ ++def : LSXPat<(srl ++ (v16i8 (add (v16i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(srl ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(srl ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(srl ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (build_vector (i64 1),(i64 1)) ++ ) ++ ), ++ (VAVGR_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), ++ (VMUH_D LSX128D:$a, LSX128D:$b)>; ++ ++def : LSXPat<(mulhs LSX128W:$a, LSX128W:$b), ++ (VMUH_W LSX128W:$a, LSX128W:$b)>; ++ ++def : LSXPat<(mulhs LSX128H:$a, LSX128H:$b), ++ (VMUH_H LSX128H:$a, LSX128H:$b)>; ++ ++def : LSXPat<(mulhs LSX128B:$a, LSX128B:$b), ++ (VMUH_B LSX128B:$a, LSX128B:$b)>; ++ ++ ++def : LSXPat<(mulhu LSX128D:$a, LSX128D:$b), ++ (VMUH_DU LSX128D:$a, LSX128D:$b)>; ++ ++def : LSXPat<(mulhu LSX128W:$a, LSX128W:$b), ++ (VMUH_WU LSX128W:$a, LSX128W:$b)>; ++ ++def : LSXPat<(mulhu LSX128H:$a, LSX128H:$b), ++ (VMUH_HU LSX128H:$a, LSX128H:$b)>; ++ ++def : LSXPat<(mulhu LSX128B:$a, LSX128B:$b), ++ (VMUH_BU LSX128B:$a, LSX128B:$b)>; ++ ++ ++ ++//===----------------------------------------------------------------------===// ++// Intrinsics ++//===----------------------------------------------------------------------===// ++ ++def : LSXPat<(int_loongarch_lsx_vseq_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSEQ_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSEQ_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSEQ_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsle_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLE_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLE_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLE_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsle_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLE_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLE_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLE_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLE_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vslt_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLT_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLT_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLT_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vslt_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLT_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLT_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLT_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLT_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VADD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VADD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VADD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSUB_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSUB_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSUB_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSADD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSADD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSADD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSSUB_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSSUB_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSSUB_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsadd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSADD_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSADD_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSADD_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSADD_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssub_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSSUB_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSSUB_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSSUB_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSSUB_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhaddw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHADDW_H_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHADDW_W_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHADDW_D_W LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhsubw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHSUBW_H_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHSUBW_W_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHSUBW_D_W LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhaddw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHADDW_HU_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHADDW_WU_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHADDW_DU_WU LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhsubw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHSUBW_HU_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHSUBW_WU_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHSUBW_DU_WU LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vadda_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VADDA_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VADDA_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VADDA_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VADDA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vabsd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VABSD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VABSD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VABSD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VABSD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vabsd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VABSD_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VABSD_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VABSD_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VABSD_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavg_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVG_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVG_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVG_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVG_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavg_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVG_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVG_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVG_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVG_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavgr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVGR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVGR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVGR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVGR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavgr_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVGR_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVGR_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVGR_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVGR_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSRLR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSRLR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSRLR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSRLR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrar_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSRAR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSRAR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSRAR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSRAR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vbitset_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VBITSET_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VBITSET_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VBITSET_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VBITSET_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vbitrev_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VBITREV_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VBITREV_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VBITREV_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VBITREV_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfadd_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFADD_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfadd_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfsub_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFSUB_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfsub_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmax_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMAX_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmax_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMAX_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmin_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMIN_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmin_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMIN_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmaxa_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMAXA_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmaxa_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMAXA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmina_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMINA_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmina_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMINA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vclo_b (v16i8 LSX128B:$vj)), ++ (VCLO_B LSX128B:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_h (v8i16 LSX128H:$vj)), ++ (VCLO_H LSX128H:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_w (v4i32 LSX128W:$vj)), ++ (VCLO_W LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_d (v2i64 LSX128D:$vj)), ++ (VCLO_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vflogb_s (v4f32 LSX128W:$vj)), ++ (VFLOGB_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vflogb_d (v2f64 LSX128D:$vj)), ++ (VFLOGB_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfclass_s (v4f32 LSX128W:$vj)), ++ (VFCLASS_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfclass_d (v2f64 LSX128D:$vj)), ++ (VFCLASS_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfrecip_s (v4f32 LSX128W:$vj)), ++ (VFRECIP_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfrecip_d (v2f64 LSX128D:$vj)), ++ (VFRECIP_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfrsqrt_s (v4f32 LSX128W:$vj)), ++ (VFRSQRT_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfrsqrt_d (v2f64 LSX128D:$vj)), ++ (VFRSQRT_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcvtl_s_h (v8i16 LSX128H:$vk)), ++ (VFCVTL_S_H LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcvth_s_h (v8i16 LSX128H:$vk)), ++ (VFCVTH_S_H LSX128H:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcvtl_d_s (v4f32 LSX128W:$vj)), ++ (VFCVTL_D_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfcvth_d_s (v4f32 LSX128W:$vj)), ++ (VFCVTH_D_S LSX128W:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftint_w_s (v4f32 LSX128W:$vj)), ++ (VFTINT_W_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftint_l_d (v2f64 LSX128D:$vj)), ++ (VFTINT_L_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftint_wu_s (v4f32 LSX128W:$vj)), ++ (VFTINT_WU_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftint_lu_d (v2f64 LSX128D:$vj)), ++ (VFTINT_LU_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_b GPR32Opnd:$rj), ++ (VREPLGR2VR_B GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_h GPR32Opnd:$rj), ++ (VREPLGR2VR_H GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_w GPR32Opnd:$rj), ++ (VREPLGR2VR_W GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_d GPR64Opnd:$rj), ++ (VREPLGR2VR_D GPR64Opnd:$rj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlri_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSRLRI_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSRLRI_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSRLRI_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSRLRI_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrari_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSRARI_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSRARI_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSRARI_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSRARI_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_b (v16i8 LSX128B:$vj), GPR32Opnd:$rj, (immZExt4:$ui4)), ++ (VINSGR2VR_B LSX128B:$vj, GPR32Opnd:$rj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_h (v8i16 LSX128H:$vj), GPR32Opnd:$rj, (immZExt3:$ui3)), ++ (VINSGR2VR_H LSX128H:$vj, GPR32Opnd:$rj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_w (v4i32 LSX128W:$vj), GPR32Opnd:$rj, (immZExt2:$ui2)), ++ (VINSGR2VR_W LSX128W:$vj, GPR32Opnd:$rj, uimm2:$ui2)>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_d (v2i64 LSX128D:$vj), GPR64Opnd:$rj, (immZExt1:$ui1)), ++ (VINSGR2VR_D LSX128D:$vj, GPR64Opnd:$rj, uimm1i:$ui1)>; ++ ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_b (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VPICKVE2GR_B LSX128B:$vj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_h (v8i16 LSX128H:$vj), (immZExt3:$ui3)), ++ (VPICKVE2GR_H LSX128H:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_w (v4i32 LSX128W:$vj), (immZExt2:$ui2)), ++ (VPICKVE2GR_W LSX128W:$vj, uimm2:$ui2)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_d (v2i64 LSX128D:$vj), (immZExt1:$ui1)), ++ (VPICKVE2GR_D LSX128D:$vj, uimm1i:$ui1)>; ++ ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_bu (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VPICKVE2GR_BU LSX128B:$vj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_hu (v8i16 LSX128H:$vj), (immZExt3:$ui3)), ++ (VPICKVE2GR_HU LSX128H:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_wu (v4i32 LSX128W:$vj), (immZExt2:$ui2)), ++ (VPICKVE2GR_WU LSX128W:$vj, uimm2:$ui2)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsat_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSAT_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsat_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSAT_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsat_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSAT_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsat_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSAT_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsat_bu (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSAT_BU LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsat_hu (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSAT_HU LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsat_wu (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSAT_WU LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsat_du (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSAT_DU LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vmskltz_b (v16i8 LSX128B:$vj)), ++ (VMSKLTZ_B LSX128B:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_h (v8i16 LSX128H:$vj)), ++ (VMSKLTZ_H LSX128H:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_w (v4i32 LSX128W:$vj)), ++ (VMSKLTZ_W LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_d (v2i64 LSX128D:$vj)), ++ (VMSKLTZ_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlrni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLRNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLRNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLRNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLRNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrani_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRANI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRANI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRANI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRANI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrani_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRANI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRANI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRANI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRANI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrarni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRARNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRARNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRARNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRARNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(load (add iPTR:$vj, GPR64Opnd:$vk)), ++ (VLDX PtrRC:$vj, GPR64Opnd:$vk)>; ++ ++def : LSXPat<(store (v16i8 LSX128B:$vd), (add iPTR:$vj, GPR64Opnd:$vk)), ++ (VSTX LSX128B:$vd, PtrRC:$vj, GPR64Opnd:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vshuf_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk), (v16i8 LSX128B:$va)), ++ (VSHUF_B LSX128B:$vj, LSX128B:$vk, LSX128B:$va)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CEQ_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_COR_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_COR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUN_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUN_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUNE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUNE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUEQ_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CNE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CNE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CLT_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CLT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CULT_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CULT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CLE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CLE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CULE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CULE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftintrz_w_s (v4f32 LSX128W:$vj)), ++ (VFTINTRZ_W_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftintrz_l_d (v2f64 LSX128D:$vj)), ++ (VFTINTRZ_L_D LSX128D:$vj)>; ++ ++ ++def imm_mask : ImmLeaf(Imm) && Imm == -1;}]>; ++def imm_mask_64 : ImmLeaf(Imm) && Imm == -1;}]>; ++ ++ ++def : LSXPat<(xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask)), ++ (NOR_V_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vj))>; ++ ++def : LSXPat<(xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask)), ++ (NOR_V_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vj))>; ++ ++def : LSXPat<(xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64)), ++ (NOR_V_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vj))>; ++ ++ ++def : LSXPat<(and ++ (v16i8 (xor (v16i8 LSX128B:$vj),(vsplati8 imm_mask))), ++ (v16i8 LSX128B:$vk) ++ ), ++ (VANDN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; ++ ++def : LSXPat<(and ++ (v8i16 (xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask))), ++ (v8i16 LSX128H:$vk) ++ ), ++ (VANDN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; ++ ++def : LSXPat<(and ++ (v4i32 (xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask))), ++ (v4i32 LSX128W:$vk) ++ ), ++ (VANDN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; ++ ++def : LSXPat<(and ++ (v2i64 (xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64))), ++ (v2i64 LSX128D:$vk) ++ ), ++ (VANDN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; ++ ++ ++def : LSXPat<(or ++ (v16i8 LSX128B:$vj), ++ (v16i8 (xor (v16i8 LSX128B:$vk), (vsplati8 imm_mask))) ++ ), ++ (VORN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; ++ ++def : LSXPat<(or ++ (v8i16 LSX128H:$vj), ++ (v8i16 (xor (v8i16 LSX128H:$vk), (vsplati16 imm_mask))) ++ ), ++ (VORN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; ++ ++def : LSXPat<(or ++ (v4i32 LSX128W:$vj), ++ (v4i32 (xor (v4i32 LSX128W:$vk), (vsplati32 imm_mask))) ++ ), ++ (VORN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; ++ ++def : LSXPat<(or ++ (v2i64 LSX128D:$vj), ++ (v2i64 (xor (v2i64 LSX128D:$vk), (vsplati64 imm_mask_64))) ++ ), ++ (VORN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; ++ ++ ++def : LSXPat<(add (v2i64 (abs LSX128D:$a)), (v2i64 (abs LSX128D:$b))), ++ (VADDA_D (v2i64 LSX128D:$a),(v2i64 LSX128D:$b))>; ++ ++def : LSXPat<(add (v4i32 (abs LSX128W:$a)), (v4i32 (abs LSX128W:$b))), ++ (VADDA_W (v4i32 LSX128W:$a),(v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(add (v8i16 (abs LSX128H:$a)), (v8i16 (abs LSX128H:$b))), ++ (VADDA_H (v8i16 LSX128H:$a),(v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(add (v16i8 (abs LSX128B:$a)), (v16i8 (abs LSX128B:$b))), ++ (VADDA_B (v16i8 LSX128B:$a),(v16i8 LSX128B:$b))>; ++ ++ ++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), ++ (vsplati8 imm_mask))), ++ (VBITCLR_B v16i8:$vj, v16i8:$vk)>; ++ ++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), ++ (vsplati16 imm_mask))), ++ (VBITCLR_H v8i16:$vj, v8i16:$vk)>; ++ ++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), ++ (vsplati32 imm_mask))), ++ (VBITCLR_W v4i32:$vj, v4i32:$vk)>; ++ ++def : LSXPat<(and v2i64:$vj, (xor (shl vsplat_imm_eq_1, v2i64:$vk), ++ (vsplati64 imm_mask_64))), ++ (VBITCLR_D v2i64:$vj, v2i64:$vk)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +index 488c66f47..bf70b09d4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +@@ -1,4 +1,4 @@ +-//=- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to an MCInst -=// ++//===- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to MCInst ----------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,86 +6,337 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file contains code to lower LoongArch MachineInstrs to their +-// corresponding MCInst records. ++// This file contains code to lower LoongArch MachineInstrs to their corresponding ++// MCInst records. + // + //===----------------------------------------------------------------------===// + +-#include "LoongArch.h" +-#include "LoongArchSubtarget.h" +-#include "llvm/CodeGen/AsmPrinter.h" ++#include "LoongArchMCInstLower.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "LoongArchAsmPrinter.h" + #include "llvm/CodeGen/MachineBasicBlock.h" + #include "llvm/CodeGen/MachineInstr.h" +-#include "llvm/MC/MCAsmInfo.h" +-#include "llvm/MC/MCContext.h" +-#include "llvm/Support/raw_ostream.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/Support/ErrorHandling.h" ++#include + + using namespace llvm; + +-static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, +- const AsmPrinter &AP) { +- MCContext &Ctx = AP.OutContext; ++LoongArchMCInstLower::LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter) ++ : AsmPrinter(asmprinter) {} + +- // TODO: Processing target flags. ++void LoongArchMCInstLower::Initialize(MCContext *C) { ++ Ctx = C; ++} + +- const MCExpr *ME = +- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); ++MCOperand LoongArchMCInstLower::LowerSymbolOperand(const MachineOperand &MO, ++ MachineOperandType MOTy, ++ unsigned Offset) const { ++ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; ++ LoongArchMCExpr::LoongArchExprKind TargetKind = LoongArchMCExpr::MEK_None; ++ const MCSymbol *Symbol; + +- if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) +- ME = MCBinaryExpr::createAdd( +- ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); ++ switch(MO.getTargetFlags()) { ++ default: ++ llvm_unreachable("Invalid target flag!"); ++ case LoongArchII::MO_NO_FLAG: ++ break; ++ case LoongArchII::MO_GOT_HI: ++ TargetKind = LoongArchMCExpr::MEK_GOT_HI; ++ break; ++ case LoongArchII::MO_GOT_LO: ++ TargetKind = LoongArchMCExpr::MEK_GOT_LO; ++ break; ++ case LoongArchII::MO_GOT_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHI; ++ break; ++ case LoongArchII::MO_GOT_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHER; ++ break; ++ case LoongArchII::MO_GOT_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHEST; ++ break; ++ case LoongArchII::MO_GOT_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRLO; ++ break; ++ case LoongArchII::MO_PCREL_HI: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_HI; ++ break; ++ case LoongArchII::MO_PCREL_LO: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_LO; ++ break; ++ case LoongArchII::MO_PCREL_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHI; ++ break; ++ case LoongArchII::MO_PCREL_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHER; ++ break; ++ case LoongArchII::MO_PCREL_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHEST; ++ break; ++ case LoongArchII::MO_PCREL_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRLO; ++ break; ++ case LoongArchII::MO_TLSIE_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_HI; ++ break; ++ case LoongArchII::MO_TLSIE_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_LO; ++ break; ++ case LoongArchII::MO_TLSIE_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHI; ++ break; ++ case LoongArchII::MO_TLSIE_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHER; ++ break; ++ case LoongArchII::MO_TLSIE_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHEST; ++ break; ++ case LoongArchII::MO_TLSIE_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRLO; ++ break; ++ case LoongArchII::MO_TLSLE_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HI; ++ break; ++ case LoongArchII::MO_TLSLE_HIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHER; ++ break; ++ case LoongArchII::MO_TLSLE_HIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHEST; ++ break; ++ case LoongArchII::MO_TLSLE_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_LO; ++ break; ++ case LoongArchII::MO_TLSGD_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_HI; ++ break; ++ case LoongArchII::MO_TLSGD_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_LO; ++ break; ++ case LoongArchII::MO_TLSGD_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHI; ++ break; ++ case LoongArchII::MO_TLSGD_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHER; ++ break; ++ case LoongArchII::MO_TLSGD_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHEST; ++ break; ++ case LoongArchII::MO_TLSGD_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRLO; ++ break; ++ case LoongArchII::MO_ABS_HI: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HI; ++ break; ++ case LoongArchII::MO_ABS_HIGHER: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HIGHER; ++ break; ++ case LoongArchII::MO_ABS_HIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HIGHEST; ++ break; ++ case LoongArchII::MO_ABS_LO: ++ TargetKind = LoongArchMCExpr::MEK_ABS_LO; ++ break; ++ case LoongArchII::MO_CALL_HI: ++ TargetKind = LoongArchMCExpr::MEK_CALL_HI; ++ break; ++ case LoongArchII::MO_CALL_LO: ++ TargetKind = LoongArchMCExpr::MEK_CALL_LO; ++ break; ++ } + +- return MCOperand::createExpr(ME); +-} ++ switch (MOTy) { ++ case MachineOperand::MO_MachineBasicBlock: ++ Symbol = MO.getMBB()->getSymbol(); ++ break; ++ ++ case MachineOperand::MO_GlobalAddress: ++ Symbol = AsmPrinter.getSymbol(MO.getGlobal()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_BlockAddress: ++ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_ExternalSymbol: ++ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_MCSymbol: ++ Symbol = MO.getMCSymbol(); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_JumpTableIndex: ++ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); ++ break; ++ ++ case MachineOperand::MO_ConstantPoolIndex: ++ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); ++ Offset += MO.getOffset(); ++ break; + +-bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, +- MCOperand &MCOp, +- const AsmPrinter &AP) { +- switch (MO.getType()) { + default: +- report_fatal_error( +- "lowerLoongArchMachineOperandToMCOperand: unknown operand type"); ++ llvm_unreachable(""); ++ } ++ ++ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); ++ ++ if (Offset) { ++ // Assume offset is never negative. ++ assert(Offset > 0); ++ ++ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), ++ *Ctx); ++ } ++ ++ if (TargetKind != LoongArchMCExpr::MEK_None) ++ Expr = LoongArchMCExpr::create(TargetKind, Expr, *Ctx); ++ ++ return MCOperand::createExpr(Expr); ++} ++ ++MCOperand LoongArchMCInstLower::LowerOperand(const MachineOperand &MO, ++ unsigned offset) const { ++ MachineOperandType MOTy = MO.getType(); ++ ++ switch (MOTy) { ++ default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. +- if (MO.isImplicit()) +- return false; +- MCOp = MCOperand::createReg(MO.getReg()); +- break; +- case MachineOperand::MO_RegisterMask: +- // Regmasks are like implicit defs. +- return false; ++ if (MO.isImplicit()) break; ++ return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_Immediate: +- MCOp = MCOperand::createImm(MO.getImm()); +- break; ++ return MCOperand::createImm(MO.getImm() + offset); ++ case MachineOperand::MO_MachineBasicBlock: ++ case MachineOperand::MO_GlobalAddress: ++ case MachineOperand::MO_ExternalSymbol: ++ case MachineOperand::MO_MCSymbol: ++ case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: +- MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); ++ case MachineOperand::MO_BlockAddress: ++ return LowerSymbolOperand(MO, MOTy, offset); ++ case MachineOperand::MO_RegisterMask: + break; +- case MachineOperand::MO_GlobalAddress: +- MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP); ++ } ++ ++ return MCOperand(); ++} ++ ++MCOperand LoongArchMCInstLower::createSub(MachineBasicBlock *BB1, ++ MachineBasicBlock *BB2, ++ LoongArchMCExpr::LoongArchExprKind Kind) const { ++ const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); ++ const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); ++ const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); ++ ++ return MCOperand::createExpr(LoongArchMCExpr::create(Kind, Sub, *Ctx)); ++} ++ ++void LoongArchMCInstLower::lowerLongBranchADDI(const MachineInstr *MI, ++ MCInst &OutMI, int Opcode) const { ++ OutMI.setOpcode(Opcode); ++ ++ LoongArchMCExpr::LoongArchExprKind Kind; ++ unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); ++ switch (TargetFlags) { ++ case LoongArchII::MO_ABS_HIGHEST: ++ Kind = LoongArchMCExpr::MEK_ABS_HIGHEST; + break; +- case MachineOperand::MO_MachineBasicBlock: +- MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP); ++ case LoongArchII::MO_ABS_HIGHER: ++ Kind = LoongArchMCExpr::MEK_ABS_HIGHER; + break; +- case MachineOperand::MO_ExternalSymbol: +- MCOp = lowerSymbolOperand( +- MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); ++ case LoongArchII::MO_ABS_HI: ++ Kind = LoongArchMCExpr::MEK_ABS_HI; + break; +- // TODO: lower special operands +- case MachineOperand::MO_BlockAddress: +- case MachineOperand::MO_JumpTableIndex: ++ case LoongArchII::MO_ABS_LO: ++ Kind = LoongArchMCExpr::MEK_ABS_LO; + break; ++ default: ++ report_fatal_error("Unexpected flags for lowerLongBranchADDI"); ++ } ++ ++ // Lower two register operands. ++ for (unsigned I = 0, E = 2; I != E; ++I) { ++ const MachineOperand &MO = MI->getOperand(I); ++ OutMI.addOperand(LowerOperand(MO)); ++ } ++ ++ if (MI->getNumOperands() == 3) { ++ // Lower register operand. ++ const MCExpr *Expr = ++ MCSymbolRefExpr::create(MI->getOperand(2).getMBB()->getSymbol(), *Ctx); ++ const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); ++ OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); ++ } else if (MI->getNumOperands() == 4) { ++ // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). ++ OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), ++ MI->getOperand(3).getMBB(), Kind)); + } +- return true; + } + +-bool llvm::lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, +- MCInst &OutMI, AsmPrinter &AP) { ++void LoongArchMCInstLower::lowerLongBranchPCADDU12I(const MachineInstr *MI, ++ MCInst &OutMI, int Opcode) const { ++ OutMI.setOpcode(Opcode); ++ ++ LoongArchMCExpr::LoongArchExprKind Kind; ++ unsigned TargetFlags = MI->getOperand(1).getTargetFlags(); ++ switch (TargetFlags) { ++ case LoongArchII::MO_PCREL_HI: ++ Kind = LoongArchMCExpr::MEK_PCREL_HI; ++ break; ++ case LoongArchII::MO_PCREL_LO: ++ Kind = LoongArchMCExpr::MEK_PCREL_LO; ++ break; ++ default: ++ report_fatal_error("Unexpected flags for lowerLongBranchADDI"); ++ } ++ ++ // Lower one register operands. ++ const MachineOperand &MO = MI->getOperand(0); ++ OutMI.addOperand(LowerOperand(MO)); ++ ++ const MCExpr *Expr = ++ MCSymbolRefExpr::create(MI->getOperand(1).getMBB()->getSymbol(), *Ctx); ++ const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); ++ OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); ++} ++bool LoongArchMCInstLower::lowerLongBranch(const MachineInstr *MI, ++ MCInst &OutMI) const { ++ switch (MI->getOpcode()) { ++ default: ++ return false; ++ case LoongArch::LONG_BRANCH_ADDIW: ++ case LoongArch::LONG_BRANCH_ADDIW2Op: ++ lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_W); ++ return true; ++ case LoongArch::LONG_BRANCH_ADDID: ++ case LoongArch::LONG_BRANCH_ADDID2Op: ++ lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_D); ++ return true; ++ case LoongArch::LONG_BRANCH_PCADDU12I: ++ lowerLongBranchPCADDU12I(MI, OutMI, LoongArch::PCADDU12I); ++ return true; ++ } ++} ++ ++void LoongArchMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { ++ if (lowerLongBranch(MI, OutMI)) ++ return; ++ + OutMI.setOpcode(MI->getOpcode()); + +- for (const MachineOperand &MO : MI->operands()) { +- MCOperand MCOp; +- if (lowerLoongArchMachineOperandToMCOperand(MO, MCOp, AP)) ++ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { ++ const MachineOperand &MO = MI->getOperand(i); ++ MCOperand MCOp = LowerOperand(MO); ++ ++ if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +- return false; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h +new file mode 100644 +index 000000000..6463a7b64 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h +@@ -0,0 +1,55 @@ ++//===- LoongArchMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H ++ ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/Support/Compiler.h" ++ ++namespace llvm { ++ ++class MachineBasicBlock; ++class MachineInstr; ++class MCContext; ++class MCInst; ++class MCOperand; ++class LoongArchAsmPrinter; ++ ++/// LoongArchMCInstLower - This class is used to lower an MachineInstr into an ++/// MCInst. ++class LLVM_LIBRARY_VISIBILITY LoongArchMCInstLower { ++ using MachineOperandType = MachineOperand::MachineOperandType; ++ ++ MCContext *Ctx; ++ LoongArchAsmPrinter &AsmPrinter; ++ ++public: ++ LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter); ++ ++ void Initialize(MCContext *C); ++ void Lower(const MachineInstr *MI, MCInst &OutMI) const; ++ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; ++ ++private: ++ MCOperand LowerSymbolOperand(const MachineOperand &MO, ++ MachineOperandType MOTy, unsigned Offset) const; ++ MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, ++ LoongArchMCExpr::LoongArchExprKind Kind) const; ++ void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; ++ void lowerLongBranchADDI(const MachineInstr *MI, MCInst &OutMI, ++ int Opcode) const; ++ void lowerLongBranchPCADDU12I(const MachineInstr *MI, MCInst &OutMI, ++ int Opcode) const; ++ bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H +diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp +new file mode 100644 +index 000000000..a9c52cbb1 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp +@@ -0,0 +1,58 @@ ++//===-- LoongArchMachineFunctionInfo.cpp - Private data used for LoongArch ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMachineFunction.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/PseudoSourceValue.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/Support/CommandLine.h" ++ ++using namespace llvm; ++ ++LoongArchFunctionInfo::~LoongArchFunctionInfo() = default; ++ ++void LoongArchFunctionInfo::createEhDataRegsFI() { ++ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); ++ for (int I = 0; I < 4; ++I) { ++ const TargetRegisterClass &RC = ++ static_cast(MF.getTarget()).getABI().IsLP64() ++ ? LoongArch::GPR64RegClass ++ : LoongArch::GPR32RegClass; ++ ++ EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), ++ TRI.getSpillAlign(RC), false); ++ } ++} ++ ++bool LoongArchFunctionInfo::isEhDataRegFI(int FI) const { ++ return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] ++ || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); ++} ++ ++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const char *ES) { ++ return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)); ++} ++ ++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const GlobalValue *GV) { ++ return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV)); ++} ++ ++int LoongArchFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { ++ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); ++ if (MoveF64ViaSpillFI == -1) { ++ MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject( ++ TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC), false); ++ } ++ return MoveF64ViaSpillFI; ++} ++ ++void LoongArchFunctionInfo::anchor() {} +diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h +new file mode 100644 +index 000000000..1765013ea +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h +@@ -0,0 +1,103 @@ ++//===- LoongArchMachineFunctionInfo.h - Private data used for LoongArch ---*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares the LoongArch specific subclass of MachineFunctionInfo. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H ++ ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include ++ ++namespace llvm { ++ ++/// LoongArchFunctionInfo - This class is derived from MachineFunction private ++/// LoongArch target-specific information for each MachineFunction. ++class LoongArchFunctionInfo : public MachineFunctionInfo { ++public: ++ LoongArchFunctionInfo(MachineFunction &MF) : MF(MF) {} ++ ++ ~LoongArchFunctionInfo() override; ++ ++ unsigned getSRetReturnReg() const { return SRetReturnReg; } ++ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } ++ ++ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } ++ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } ++ ++ unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } ++ void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } ++ ++ bool hasByvalArg() const { return HasByvalArg; } ++ void setFormalArgInfo(unsigned Size, bool HasByval) { ++ IncomingArgSize = Size; ++ HasByvalArg = HasByval; ++ } ++ ++ unsigned getIncomingArgSize() const { return IncomingArgSize; } ++ ++ bool callsEhReturn() const { return CallsEhReturn; } ++ void setCallsEhReturn() { CallsEhReturn = true; } ++ ++ void createEhDataRegsFI(); ++ int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } ++ bool isEhDataRegFI(int FI) const; ++ ++ /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue ++ /// object representing a GOT entry for an external function. ++ MachinePointerInfo callPtrInfo(const char *ES); ++ ++ /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object ++ /// representing a GOT entry for a global function. ++ MachinePointerInfo callPtrInfo(const GlobalValue *GV); ++ ++ void setSaveS2() { SaveS2 = true; } ++ bool hasSaveS2() const { return SaveS2; } ++ ++ int getMoveF64ViaSpillFI(const TargetRegisterClass *RC); ++ ++private: ++ virtual void anchor(); ++ ++ MachineFunction& MF; ++ ++ /// SRetReturnReg - Some subtargets require that sret lowering includes ++ /// returning the value of the returned struct in a register. This field ++ /// holds the virtual register into which the sret argument is passed. ++ unsigned SRetReturnReg = 0; ++ ++ /// VarArgsFrameIndex - FrameIndex for start of varargs area. ++ int VarArgsFrameIndex = 0; ++ int VarArgsSaveSize = 0; ++ ++ /// True if function has a byval argument. ++ bool HasByvalArg; ++ ++ /// Size of incoming argument area. ++ unsigned IncomingArgSize; ++ ++ /// CallsEhReturn - Whether the function calls llvm.eh.return. ++ bool CallsEhReturn = false; ++ ++ /// Frame objects for spilling eh data registers. ++ int EhDataRegFI[4]; ++ ++ // saveS2 ++ bool SaveS2 = false; ++ ++ /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the ++ /// LP32 FPXX ABI is enabled. -1 is used to denote invalid index. ++ int MoveF64ViaSpillFI = -1; ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H +diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +deleted file mode 100644 +index d4a6c884b..000000000 +--- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h ++++ /dev/null +@@ -1,57 +0,0 @@ +-//=- LoongArchMachineFunctionInfo.h - LoongArch machine function info -----===// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file declares LoongArch-specific per-machine-function information. +-// +-//===----------------------------------------------------------------------===// +- +-#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H +-#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H +- +-#include "LoongArchSubtarget.h" +-#include "llvm/CodeGen/MachineFrameInfo.h" +-#include "llvm/CodeGen/MachineFunction.h" +- +-namespace llvm { +- +-/// LoongArchMachineFunctionInfo - This class is derived from +-/// MachineFunctionInfo and contains private LoongArch-specific information for +-/// each MachineFunction. +-class LoongArchMachineFunctionInfo : public MachineFunctionInfo { +-private: +- /// FrameIndex for start of varargs area +- int VarArgsFrameIndex = 0; +- /// Size of the save area used for varargs +- int VarArgsSaveSize = 0; +- +- /// Size of stack frame to save callee saved registers +- unsigned CalleeSavedStackSize = 0; +- +-public: +- LoongArchMachineFunctionInfo(const MachineFunction &MF) {} +- +- MachineFunctionInfo * +- clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, +- const DenseMap &Src2DstMBB) +- const override { +- return DestMF.cloneInfo(*this); +- } +- +- int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } +- void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } +- +- unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } +- void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } +- +- unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } +- void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } +-}; +- +-} // end namespace llvm +- +-#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H +diff --git a/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp +new file mode 100644 +index 000000000..8dbf30f21 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp +@@ -0,0 +1,53 @@ ++//===----------------------------------------------------------------------===// ++// Instruction Selector Subtarget Control ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// This file defines a pass used to change the subtarget for the ++// LoongArch Instruction selector. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/CodeGen/TargetPassConfig.h" ++#include "llvm/CodeGen/StackProtector.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/raw_ostream.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-isel" ++ ++namespace { ++ class LoongArchModuleDAGToDAGISel : public MachineFunctionPass { ++ public: ++ static char ID; ++ ++ LoongArchModuleDAGToDAGISel() : MachineFunctionPass(ID) {} ++ ++ // Pass Name ++ StringRef getPassName() const override { ++ return "LoongArch DAG->DAG Pattern Instruction Selection"; ++ } ++ ++ void getAnalysisUsage(AnalysisUsage &AU) const override { ++ AU.addRequired(); ++ AU.addPreserved(); ++ MachineFunctionPass::getAnalysisUsage(AU); ++ } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ }; ++ ++ char LoongArchModuleDAGToDAGISel::ID = 0; ++} ++ ++bool LoongArchModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { ++ LLVM_DEBUG(errs() << "In LoongArchModuleDAGToDAGISel::runMachineFunction\n"); ++ return false; ++} ++ ++llvm::FunctionPass *llvm::createLoongArchModuleISelDagPass() { ++ return new LoongArchModuleDAGToDAGISel(); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +index 05902ebb7..4d1a3cf22 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +@@ -1,4 +1,4 @@ +-//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -*- C++ -*-=// ++//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,132 +6,352 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file contains the LoongArch implementation of the TargetRegisterInfo +-// class. ++// This file contains the LoongArch implementation of the TargetRegisterInfo class. + // + //===----------------------------------------------------------------------===// + + #include "LoongArchRegisterInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" + #include "LoongArch.h" ++#include "LoongArchMachineFunction.h" + #include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/ADT/BitVector.h" ++#include "llvm/ADT/STLExtras.h" + #include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/MachineFunction.h" +-#include "llvm/CodeGen/MachineInstrBuilder.h" +-#include "llvm/CodeGen/RegisterScavenging.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" + #include "llvm/CodeGen/TargetFrameLowering.h" +-#include "llvm/CodeGen/TargetInstrInfo.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/Function.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/Support/Debug.h" + #include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include + + using namespace llvm; + ++#define DEBUG_TYPE "loongarch-reg-info" ++ + #define GET_REGINFO_TARGET_DESC + #include "LoongArchGenRegisterInfo.inc" + +-LoongArchRegisterInfo::LoongArchRegisterInfo(unsigned HwMode) +- : LoongArchGenRegisterInfo(LoongArch::R1, /*DwarfFlavour*/ 0, +- /*EHFlavor*/ 0, +- /*PC*/ 0, HwMode) {} ++LoongArchRegisterInfo::LoongArchRegisterInfo() : LoongArchGenRegisterInfo(LoongArch::RA) {} + +-const MCPhysReg * +-LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +- auto &Subtarget = MF->getSubtarget(); ++unsigned LoongArchRegisterInfo::getPICCallReg() { return LoongArch::T8; } ++ ++const TargetRegisterClass * ++LoongArchRegisterInfo::getPointerRegClass(const MachineFunction &MF, ++ unsigned Kind) const { ++ LoongArchABIInfo ABI = MF.getSubtarget().getABI(); ++ LoongArchPtrClass PtrClassKind = static_cast(Kind); ++ ++ switch (PtrClassKind) { ++ case LoongArchPtrClass::Default: ++ return ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ case LoongArchPtrClass::StackPointer: ++ return ABI.ArePtrs64bit() ? &LoongArch::SP64RegClass : &LoongArch::SP32RegClass; ++ } ++ ++ llvm_unreachable("Unknown pointer kind"); ++} + +- switch (Subtarget.getTargetABI()) { ++unsigned ++LoongArchRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, ++ MachineFunction &MF) const { ++ switch (RC->getID()) { + default: +- llvm_unreachable("Unrecognized ABI"); +- case LoongArchABI::ABI_ILP32S: +- case LoongArchABI::ABI_LP64S: +- return CSR_ILP32S_LP64S_SaveList; +- case LoongArchABI::ABI_ILP32F: +- case LoongArchABI::ABI_LP64F: +- return CSR_ILP32F_LP64F_SaveList; +- case LoongArchABI::ABI_ILP32D: +- case LoongArchABI::ABI_LP64D: +- return CSR_ILP32D_LP64D_SaveList; ++ return 0; ++ case LoongArch::GPR32RegClassID: ++ case LoongArch::GPR64RegClassID: ++ { ++ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); ++ return 28 - TFI->hasFP(MF); + } ++ case LoongArch::FGR32RegClassID: ++ return 32; ++ case LoongArch::FGR64RegClassID: ++ return 32; ++ } ++} ++ ++//===----------------------------------------------------------------------===// ++// Callee Saved Registers methods ++//===----------------------------------------------------------------------===// ++ ++/// LoongArch Callee Saved Registers ++const MCPhysReg * ++LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ++ const LoongArchSubtarget &Subtarget = MF->getSubtarget(); ++ ++ if (Subtarget.isSingleFloat()) ++ return CSR_SingleFloatOnly_SaveList; ++ ++ if (Subtarget.isABI_LP64()) ++ return CSR_LP64_SaveList; ++ ++ if (Subtarget.isABI_LPX32()) ++ return CSR_LPX32_SaveList; ++ ++ return CSR_LP32_SaveList; + } + + const uint32_t * + LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, +- CallingConv::ID CC) const { +- auto &Subtarget = MF.getSubtarget(); ++ CallingConv::ID) const { ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + +- switch (Subtarget.getTargetABI()) { +- default: +- llvm_unreachable("Unrecognized ABI"); +- case LoongArchABI::ABI_ILP32S: +- case LoongArchABI::ABI_LP64S: +- return CSR_ILP32S_LP64S_RegMask; +- case LoongArchABI::ABI_ILP32F: +- case LoongArchABI::ABI_LP64F: +- return CSR_ILP32F_LP64F_RegMask; +- case LoongArchABI::ABI_ILP32D: +- case LoongArchABI::ABI_LP64D: +- return CSR_ILP32D_LP64D_RegMask; +- } +-} ++ if (Subtarget.isSingleFloat()) ++ return CSR_SingleFloatOnly_RegMask; ++ ++ if (Subtarget.isABI_LP64()) ++ return CSR_LP64_RegMask; + +-const uint32_t *LoongArchRegisterInfo::getNoPreservedMask() const { +- return CSR_NoRegs_RegMask; ++ return CSR_LP32_RegMask; + } + +-BitVector +-LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +- const LoongArchFrameLowering *TFI = getFrameLowering(MF); ++BitVector LoongArchRegisterInfo:: ++getReservedRegs(const MachineFunction &MF) const { ++ static const MCPhysReg ReservedGPR32[] = { ++ LoongArch::ZERO, LoongArch::SP, LoongArch::TP, LoongArch::T9 ++ }; ++ ++ static const MCPhysReg ReservedGPR64[] = { ++ LoongArch::ZERO_64, LoongArch::SP_64, LoongArch::TP_64, LoongArch::T9_64 ++ }; ++ + BitVector Reserved(getNumRegs()); ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ ++ for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I) ++ Reserved.set(ReservedGPR32[I]); ++ ++ for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I) ++ Reserved.set(ReservedGPR64[I]); ++ ++ // Reserve FP if this function should have a dedicated frame pointer register. ++ if (Subtarget.getFrameLowering()->hasFP(MF)) { ++ Reserved.set(LoongArch::FP); ++ Reserved.set(LoongArch::FP_64); ++ ++ // Reserve the base register if we need to both realign the stack and ++ // allocate variable-sized objects at runtime. This should test the ++ // same conditions as LoongArchFrameLowering::hasBP(). ++ if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) { ++ Reserved.set(LoongArch::S7); ++ Reserved.set(LoongArch::S7_64); ++ } ++ } + +- // Use markSuperRegs to ensure any register aliases are also reserved +- markSuperRegs(Reserved, LoongArch::R0); // zero +- markSuperRegs(Reserved, LoongArch::R2); // tp +- markSuperRegs(Reserved, LoongArch::R3); // sp +- markSuperRegs(Reserved, LoongArch::R21); // non-allocatable +- if (TFI->hasFP(MF)) +- markSuperRegs(Reserved, LoongArch::R22); // fp +- // Reserve the base register if we need to realign the stack and allocate +- // variable-sized objects at runtime. +- if (TFI->hasBP(MF)) +- markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp +- +- assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; + } + +-bool LoongArchRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { +- return PhysReg == LoongArch::R0; ++bool ++LoongArchRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { ++ return true; + } + +-Register +-LoongArchRegisterInfo::getFrameRegister(const MachineFunction &MF) const { +- const TargetFrameLowering *TFI = getFrameLowering(MF); +- return TFI->hasFP(MF) ? LoongArch::R22 : LoongArch::R3; ++bool LoongArchRegisterInfo:: ++requiresFrameIndexScavenging(const MachineFunction &MF) const { ++ return true; + } + +-void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, +- int SPAdj, +- unsigned FIOperandNum, +- RegScavenger *RS) const { +- // TODO: this implementation is a temporary placeholder which does just +- // enough to allow other aspects of code generation to be tested. ++bool ++LoongArchRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { ++ return true; ++} ++ ++/// Get the size of the offset supported by the given load/store/inline asm. ++/// The result includes the effects of any scale factors applied to the ++/// instruction immediate. ++static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode, ++ MachineOperand MO) { ++ switch (Opcode) { ++ case LoongArch::LDPTR_W: ++ case LoongArch::LDPTR_W32: ++ case LoongArch::LDPTR_D: ++ case LoongArch::STPTR_W: ++ case LoongArch::STPTR_W32: ++ case LoongArch::STPTR_D: ++ case LoongArch::LL_W: ++ case LoongArch::LL_D: ++ case LoongArch::SC_W: ++ case LoongArch::SC_D: ++ return 14 + 2 /* scale factor */; ++ case LoongArch::INLINEASM: { ++ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm()); ++ switch (ConstraintID) { ++ case InlineAsm::Constraint_ZC: { ++ return 14 + 2 /* scale factor */; ++ } ++ default: ++ return 12; ++ } ++ } ++ default: ++ return 12; ++ } ++} + +- assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); ++/// Get the scale factor applied to the immediate in the given load/store. ++static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) { ++ switch (Opcode) { ++ case LoongArch::LDPTR_W: ++ case LoongArch::LDPTR_W32: ++ case LoongArch::LDPTR_D: ++ case LoongArch::STPTR_W: ++ case LoongArch::STPTR_W32: ++ case LoongArch::STPTR_D: ++ case LoongArch::LL_W: ++ case LoongArch::LL_D: ++ case LoongArch::SC_W: ++ case LoongArch::SC_D: ++ return 4; ++ default: ++ return 1; ++ } ++} + ++// FrameIndex represent objects inside a abstract stack. ++// We must replace FrameIndex with an stack/frame pointer ++// direct reference. ++void LoongArchRegisterInfo:: ++eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, ++ unsigned FIOperandNum, RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); +- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +- DebugLoc DL = MI.getDebugLoc(); ++ const LoongArchFrameLowering *TFI = getFrameLowering(MF); ++ ++ LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; ++ errs() << "<--------->\n" ++ << MI); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); ++ uint64_t stackSize = MF.getFrameInfo().getStackSize(); ++ int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); ++ ++ LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" ++ << "spOffset : " << spOffset << "\n" ++ << "stackSize : " << stackSize << "\n" ++ << "SPAdj : " << SPAdj << "\n" ++ << "alignment : " ++ << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex)) ++ << "\n"); ++ ++ LoongArchABIInfo ABI = ++ static_cast(MF.getTarget()).getABI(); ++ ++ // Everything else is referenced relative to whatever register ++ // getFrameIndexReference() returns. + Register FrameReg; + StackOffset Offset = + TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); + +- // Offsets must be encodable with a 12-bit immediate field. +- if (!isInt<12>(Offset.getFixed())) { +- report_fatal_error("Frame offsets outside of the signed 12-bit range is " +- "not supported currently"); ++ LLVM_DEBUG(errs() << "Location : " ++ << "FrameReg<" << FrameReg << "> + " << Offset.getFixed() ++ << "\n<--------->\n"); ++ ++ MachineBasicBlock &MBB = *MI.getParent(); ++ DebugLoc DL = II->getDebugLoc(); ++ bool IsKill = false; ++ ++ if (!MI.isDebugValue()) { ++ // Make sure Offset fits within the field available. ++ // For ldptr/stptr/ll/sc instructions, this is a 14-bit signed immediate ++ // (scaled by 2), otherwise it is a 12-bit signed immediate. ++ unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits( ++ MI.getOpcode(), MI.getOperand(FIOperandNum - 1)); ++ const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); ++ ++ if (OffsetBitSize == 16 && isInt<12>(Offset.getFixed()) && ++ !isAligned(OffsetAlign, Offset.getFixed())) { ++ // If we have an offset that needs to fit into a signed n-bit immediate ++ // (where n == 16) and doesn't aligned and does fit into 12-bits ++ // then use an ADDI ++ const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ++ ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass; ++ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); ++ unsigned Reg = RegInfo.createVirtualRegister(PtrRC); ++ const LoongArchInstrInfo &TII = *static_cast( ++ MBB.getParent()->getSubtarget().getInstrInfo()); ++ BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiOp()), Reg) ++ .addReg(FrameReg) ++ .addImm(Offset.getFixed()); ++ ++ FrameReg = Reg; ++ Offset = StackOffset::getFixed(0); ++ IsKill = true; ++ } else if (!isInt<12>(Offset.getFixed())) { ++ // Otherwise split the offset into several pieces and add it in multiple ++ // instructions. ++ const LoongArchInstrInfo &TII = *static_cast( ++ MBB.getParent()->getSubtarget().getInstrInfo()); ++ unsigned Reg = TII.loadImmediate(Offset.getFixed(), MBB, II, DL); ++ BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddOp()), Reg) ++ .addReg(FrameReg) ++ .addReg(Reg, RegState::Kill); ++ ++ FrameReg = Reg; ++ Offset = StackOffset::getFixed(0); ++ IsKill = true; ++ } + } + +- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); ++ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, IsKill); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + } ++ ++Register LoongArchRegisterInfo:: ++getFrameRegister(const MachineFunction &MF) const { ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); ++ bool IsLP64 = ++ static_cast(MF.getTarget()).getABI().IsLP64(); ++ ++ return TFI->hasFP(MF) ? (IsLP64 ? LoongArch::FP_64 : LoongArch::FP) : ++ (IsLP64 ? LoongArch::SP_64 : LoongArch::SP); ++} ++ ++const TargetRegisterClass * ++LoongArchRegisterInfo::intRegClass(unsigned Size) const { ++ if (Size == 4) ++ return &LoongArch::GPR32RegClass; ++ ++ assert(Size == 8); ++ return &LoongArch::GPR64RegClass; ++} ++ ++bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { ++ // Avoid realigning functions that explicitly do not want to be realigned. ++ // Normally, we should report an error when a function should be dynamically ++ // realigned but also has the attribute no-realign-stack. Unfortunately, ++ // with this attribute, MachineFrameInfo clamps each new object's alignment ++ // to that of the stack's alignment as specified by the ABI. As a result, ++ // the information of whether we have objects with larger alignment ++ // requirement than the stack's alignment is already lost at this point. ++ if (!TargetRegisterInfo::canRealignStack(MF)) ++ return false; ++ ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ unsigned FP = Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP; ++ unsigned BP = Subtarget.is64Bit() ? LoongArch::S7_64 : LoongArch::S7; ++ ++ // We can't perform dynamic stack realignment if we can't reserve the ++ // frame pointer register. ++ if (!MF.getRegInfo().canReserveReg(FP)) ++ return false; ++ ++ // We can realign the stack if we know the maximum call frame size and we ++ // don't have variable sized objects. ++ if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) ++ return true; ++ ++ // We have to reserve the base pointer register in the presence of variable ++ // sized objects. ++ return MF.getRegInfo().canReserveReg(BP); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +index cca130c3b..dd3be916a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +@@ -1,4 +1,4 @@ +-//= LoongArchRegisterInfo.h - LoongArch Register Information Impl -*- C++ -*-=// ++//===- LoongArchRegisterInfo.h - LoongArch Register Information Impl ------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,45 +6,75 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file contains the LoongArch implementation of the TargetRegisterInfo +-// class. ++// This file contains the LoongArch implementation of the TargetRegisterInfo class. + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H + +-#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "LoongArch.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include + + #define GET_REGINFO_HEADER + #include "LoongArchGenRegisterInfo.inc" + + namespace llvm { + +-struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { ++class TargetRegisterClass; + +- LoongArchRegisterInfo(unsigned HwMode); ++class LoongArchRegisterInfo : public LoongArchGenRegisterInfo { ++public: ++ enum class LoongArchPtrClass { ++ /// The default register class for integer values. ++ Default = 0, ++ /// The stack pointer only. ++ StackPointer = 1, ++ }; + ++ LoongArchRegisterInfo(); ++ ++ /// Get PIC indirect call register ++ static unsigned getPICCallReg(); ++ ++ /// Code Generation virtual methods... ++ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, ++ unsigned Kind) const override; ++ ++ unsigned getRegPressureLimit(const TargetRegisterClass *RC, ++ MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; +- const uint32_t *getNoPreservedMask() const override; +- + BitVector getReservedRegs(const MachineFunction &MF) const override; +- bool isConstantPhysReg(MCRegister PhysReg) const override; + +- const TargetRegisterClass * +- getPointerRegClass(const MachineFunction &MF, +- unsigned Kind = 0) const override { +- return &LoongArch::GPRRegClass; +- } ++ bool requiresRegisterScavenging(const MachineFunction &MF) const override; ++ ++ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + +- void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, +- unsigned FIOperandNum, ++ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; ++ ++ /// Stack Frame Processing Methods ++ void eliminateFrameIndex(MachineBasicBlock::iterator II, ++ int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + ++ // Stack realignment queries. ++ bool canRealignStack(const MachineFunction &MF) const override; ++ ++ /// Debug information queries. + Register getFrameRegister(const MachineFunction &MF) const override; ++ ++ /// Return GPR register class. ++ const TargetRegisterClass *intRegClass(unsigned Size) const; ++ ++private: ++ void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, ++ int FrameIndex, uint64_t StackSize, ++ int SPAdj, int64_t SPOffset) const; + }; ++ + } // end namespace llvm + + #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +index 2d5ad99f6..96569e075 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +@@ -1,4 +1,4 @@ +-//===-- LoongArchRegisterInfo.td - LoongArch Register defs -*- tablegen -*-===// ++//===-- LoongArchRegisterInfo.td - LoongArch Register defs -----------*- tablegen -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -7,155 +7,367 @@ + //===----------------------------------------------------------------------===// + + //===----------------------------------------------------------------------===// +-// Declarations that describe the LoongArch register files ++// Declarations that describe the LoongArch register file + //===----------------------------------------------------------------------===// +- + let Namespace = "LoongArch" in { +-class LoongArchReg Enc, string n, list alt = []> +- : Register { ++def sub_32 : SubRegIndex<32>; ++def sub_64 : SubRegIndex<64>; ++def sub_128 : SubRegIndex<128>; ++def sub_fcsr1 : SubRegIndex<5>; ++def sub_fcsr2 : SubRegIndex<13, 16>; ++def sub_fcsr3 : SubRegIndex<2, 8>; ++def sub_lo : SubRegIndex<32>; ++def sub_hi : SubRegIndex<32, 32>; ++def PC : Register<"pc">; ++} ++ ++class Unallocatable { ++ bit isAllocatable = 0; ++} ++ ++/// We have banks of registers each. ++class LoongArchReg Enc, string n> : Register { + let HWEncoding = Enc; +- let AltNames = alt; ++ let Namespace = "LoongArch"; + } + +-class LoongArchReg32 Enc, string n, list alt = []> +- : Register { ++class LoongArchRegWithSubRegs Enc, string n, list subregs> ++ : RegisterWithSubRegs { + let HWEncoding = Enc; +- let AltNames = alt; ++ let Namespace = "LoongArch"; + } + +-def sub_32 : SubRegIndex<32>; +-class LoongArchReg64 +- : Register<""> { +- let HWEncoding = subreg.HWEncoding; +- let SubRegs = [subreg]; ++/// LoongArch 32-bit CPU Registers. ++class LoongArch32GPR Enc, string n> : LoongArchReg; ++ ++/// LoongArch 64-bit CPU Registers. ++class LoongArch64GPR Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_32]; +- let AsmName = subreg.AsmName; +- let AltNames = subreg.AltNames; +-} +- +-let FallbackRegAltNameIndex = NoRegAltName in +-def RegAliasName : RegAltNameIndex; +-} // Namespace = "LoongArch" +- +-// Integer registers +- +-let RegAltNameIndices = [RegAliasName] in { +- def R0 : LoongArchReg<0, "r0", ["zero"]>, DwarfRegNum<[0]>; +- def R1 : LoongArchReg<1, "r1", ["ra"]>, DwarfRegNum<[1]>; +- def R2 : LoongArchReg<2, "r2", ["tp"]>, DwarfRegNum<[2]>; +- def R3 : LoongArchReg<3, "r3", ["sp"]>, DwarfRegNum<[3]>; +- def R4 : LoongArchReg<4, "r4", ["a0"]>, DwarfRegNum<[4]>; +- def R5 : LoongArchReg<5, "r5", ["a1"]>, DwarfRegNum<[5]>; +- def R6 : LoongArchReg<6, "r6", ["a2"]>, DwarfRegNum<[6]>; +- def R7 : LoongArchReg<7, "r7", ["a3"]>, DwarfRegNum<[7]>; +- def R8 : LoongArchReg<8, "r8", ["a4"]>, DwarfRegNum<[8]>; +- def R9 : LoongArchReg<9, "r9", ["a5"]>, DwarfRegNum<[9]>; +- def R10 : LoongArchReg<10, "r10", ["a6"]>, DwarfRegNum<[10]>; +- def R11 : LoongArchReg<11, "r11", ["a7"]>, DwarfRegNum<[11]>; +- def R12 : LoongArchReg<12, "r12", ["t0"]>, DwarfRegNum<[12]>; +- def R13 : LoongArchReg<13, "r13", ["t1"]>, DwarfRegNum<[13]>; +- def R14 : LoongArchReg<14, "r14", ["t2"]>, DwarfRegNum<[14]>; +- def R15 : LoongArchReg<15, "r15", ["t3"]>, DwarfRegNum<[15]>; +- def R16 : LoongArchReg<16, "r16", ["t4"]>, DwarfRegNum<[16]>; +- def R17 : LoongArchReg<17, "r17", ["t5"]>, DwarfRegNum<[17]>; +- def R18 : LoongArchReg<18, "r18", ["t6"]>, DwarfRegNum<[18]>; +- def R19 : LoongArchReg<19, "r19", ["t7"]>, DwarfRegNum<[19]>; +- def R20 : LoongArchReg<20, "r20", ["t8"]>, DwarfRegNum<[20]>; +- def R21 : LoongArchReg<21, "r21", [""]>, DwarfRegNum<[21]>; +- def R22 : LoongArchReg<22, "r22", ["fp", "s9"]>, DwarfRegNum<[22]>; +- def R23 : LoongArchReg<23, "r23", ["s0"]>, DwarfRegNum<[23]>; +- def R24 : LoongArchReg<24, "r24", ["s1"]>, DwarfRegNum<[24]>; +- def R25 : LoongArchReg<25, "r25", ["s2"]>, DwarfRegNum<[25]>; +- def R26 : LoongArchReg<26, "r26", ["s3"]>, DwarfRegNum<[26]>; +- def R27 : LoongArchReg<27, "r27", ["s4"]>, DwarfRegNum<[27]>; +- def R28 : LoongArchReg<28, "r28", ["s5"]>, DwarfRegNum<[28]>; +- def R29 : LoongArchReg<29, "r29", ["s6"]>, DwarfRegNum<[29]>; +- def R30 : LoongArchReg<30, "r30", ["s7"]>, DwarfRegNum<[30]>; +- def R31 : LoongArchReg<31, "r31", ["s8"]>, DwarfRegNum<[31]>; +-} // RegAltNameIndices = [RegAliasName] +- +-def GRLenVT : ValueTypeByHwMode<[LA32, LA64], +- [i32, i64]>; +-def GRLenRI : RegInfoByHwMode< +- [LA32, LA64], +- [RegInfo<32,32,32>, RegInfo<64,64,64>]>; +- +-// The order of registers represents the preferred allocation sequence. +-// Registers are listed in the order caller-save, callee-save, specials. +-def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add +- // Argument registers (a0...a7) +- (sequence "R%u", 4, 11), +- // Temporary registers (t0...t8) +- (sequence "R%u", 12, 20), +- // Static register (s9/fp, s0...s8) +- (sequence "R%u", 22, 31), +- // Specials (r0, ra, tp, sp) +- (sequence "R%u", 0, 3), +- // Reserved (Non-allocatable) +- R21 +- )> { +- let RegInfos = GRLenRI; +-} +- +-// Floating point registers +- +-let RegAltNameIndices = [RegAliasName] in { +- def F0 : LoongArchReg32<0, "f0", ["fa0"]>, DwarfRegNum<[32]>; +- def F1 : LoongArchReg32<1, "f1", ["fa1"]>, DwarfRegNum<[33]>; +- def F2 : LoongArchReg32<2, "f2", ["fa2"]>, DwarfRegNum<[34]>; +- def F3 : LoongArchReg32<3, "f3", ["fa3"]>, DwarfRegNum<[35]>; +- def F4 : LoongArchReg32<4, "f4", ["fa4"]>, DwarfRegNum<[36]>; +- def F5 : LoongArchReg32<5, "f5", ["fa5"]>, DwarfRegNum<[37]>; +- def F6 : LoongArchReg32<6, "f6", ["fa6"]>, DwarfRegNum<[38]>; +- def F7 : LoongArchReg32<7, "f7", ["fa7"]>, DwarfRegNum<[39]>; +- def F8 : LoongArchReg32<8, "f8", ["ft0"]>, DwarfRegNum<[40]>; +- def F9 : LoongArchReg32<9, "f9", ["ft1"]>, DwarfRegNum<[41]>; +- def F10 : LoongArchReg32<10,"f10", ["ft2"]>, DwarfRegNum<[42]>; +- def F11 : LoongArchReg32<11,"f11", ["ft3"]>, DwarfRegNum<[43]>; +- def F12 : LoongArchReg32<12,"f12", ["ft4"]>, DwarfRegNum<[44]>; +- def F13 : LoongArchReg32<13,"f13", ["ft5"]>, DwarfRegNum<[45]>; +- def F14 : LoongArchReg32<14,"f14", ["ft6"]>, DwarfRegNum<[46]>; +- def F15 : LoongArchReg32<15,"f15", ["ft7"]>, DwarfRegNum<[47]>; +- def F16 : LoongArchReg32<16,"f16", ["ft8"]>, DwarfRegNum<[48]>; +- def F17 : LoongArchReg32<17,"f17", ["ft9"]>, DwarfRegNum<[49]>; +- def F18 : LoongArchReg32<18,"f18", ["ft10"]>, DwarfRegNum<[50]>; +- def F19 : LoongArchReg32<19,"f19", ["ft11"]>, DwarfRegNum<[51]>; +- def F20 : LoongArchReg32<20,"f20", ["ft12"]>, DwarfRegNum<[52]>; +- def F21 : LoongArchReg32<21,"f21", ["ft13"]>, DwarfRegNum<[53]>; +- def F22 : LoongArchReg32<22,"f22", ["ft14"]>, DwarfRegNum<[54]>; +- def F23 : LoongArchReg32<23,"f23", ["ft15"]>, DwarfRegNum<[55]>; +- def F24 : LoongArchReg32<24,"f24", ["fs0"]>, DwarfRegNum<[56]>; +- def F25 : LoongArchReg32<25,"f25", ["fs1"]>, DwarfRegNum<[57]>; +- def F26 : LoongArchReg32<26,"f26", ["fs2"]>, DwarfRegNum<[58]>; +- def F27 : LoongArchReg32<27,"f27", ["fs3"]>, DwarfRegNum<[59]>; +- def F28 : LoongArchReg32<28,"f28", ["fs4"]>, DwarfRegNum<[60]>; +- def F29 : LoongArchReg32<29,"f29", ["fs5"]>, DwarfRegNum<[61]>; +- def F30 : LoongArchReg32<30,"f30", ["fs6"]>, DwarfRegNum<[62]>; +- def F31 : LoongArchReg32<31,"f31", ["fs7"]>, DwarfRegNum<[63]>; +- +- foreach I = 0-31 in { +- def F#I#_64 : LoongArchReg64("F"#I)>, +- DwarfRegNum<[!add(I, 32)]>; +- } +-} +- +-// The order of registers represents the preferred allocation sequence. +-def FPR32 : RegisterClass<"LoongArch", [f32], 32, (sequence "F%u", 0, 31)>; +-def FPR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; +- +-// Condition flag registers ++} ++ ++/// LoongArch 64-bit Floating-point Registers ++class FGR32 Enc, string n> : LoongArchReg; ++class FGR64 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_lo]; ++} ++ ++// LoongArch 128-bit (aliased) LSX Registers ++class LSX128 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_64]; ++} ++ ++// LoongArch 256-bit (aliased) LASX Registers ++class LASX256 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_128]; ++} ++ ++//===----------------------------------------------------------------------===// ++// Registers ++//===----------------------------------------------------------------------===// ++ ++/// General Purpose 32-bit Registers ++def ZERO : LoongArch32GPR<0, "zero">, ++ DwarfRegNum<[0]>; ++def RA : LoongArch32GPR<1, "ra">, DwarfRegNum<[1]>; ++def TP : LoongArch32GPR<2, "tp">, DwarfRegNum<[2]>; ++def SP : LoongArch32GPR<3, "sp">, DwarfRegNum<[3]>; ++def A0 : LoongArch32GPR<4, "r4">, DwarfRegNum<[4]>; ++def A1 : LoongArch32GPR<5, "r5">, DwarfRegNum<[5]>; ++def A2 : LoongArch32GPR<6, "r6">, DwarfRegNum<[6]>; ++def A3 : LoongArch32GPR<7, "r7">, DwarfRegNum<[7]>; ++def A4 : LoongArch32GPR<8, "r8">, DwarfRegNum<[8]>; ++def A5 : LoongArch32GPR<9, "r9">, DwarfRegNum<[9]>; ++def A6 : LoongArch32GPR<10, "r10">, DwarfRegNum<[10]>; ++def A7 : LoongArch32GPR<11, "r11">, DwarfRegNum<[11]>; ++def T0 : LoongArch32GPR<12, "r12">, DwarfRegNum<[12]>; ++def T1 : LoongArch32GPR<13, "r13">, DwarfRegNum<[13]>; ++def T2 : LoongArch32GPR<14, "r14">, DwarfRegNum<[14]>; ++def T3 : LoongArch32GPR<15, "r15">, DwarfRegNum<[15]>; ++def T4 : LoongArch32GPR<16, "r16">, DwarfRegNum<[16]>; ++def T5 : LoongArch32GPR<17, "r17">, DwarfRegNum<[17]>; ++def T6 : LoongArch32GPR<18, "r18">, DwarfRegNum<[18]>; ++def T7 : LoongArch32GPR<19, "r19">, DwarfRegNum<[19]>; ++def T8 : LoongArch32GPR<20, "r20">, DwarfRegNum<[20]>; ++def T9 : LoongArch32GPR<21, "r21">, DwarfRegNum<[21]>; ++def FP : LoongArch32GPR<22, "r22">, DwarfRegNum<[22]>; ++def S0 : LoongArch32GPR<23, "r23">, DwarfRegNum<[23]>; ++def S1 : LoongArch32GPR<24, "r24">, DwarfRegNum<[24]>; ++def S2 : LoongArch32GPR<25, "r25">, DwarfRegNum<[25]>; ++def S3 : LoongArch32GPR<26, "r26">, DwarfRegNum<[26]>; ++def S4 : LoongArch32GPR<27, "r27">, DwarfRegNum<[27]>; ++def S5 : LoongArch32GPR<28, "r28">, DwarfRegNum<[28]>; ++def S6 : LoongArch32GPR<29, "r29">, DwarfRegNum<[29]>; ++def S7 : LoongArch32GPR<30, "r30">, DwarfRegNum<[30]>; ++def S8 : LoongArch32GPR<31, "r31">, DwarfRegNum<[31]>; ++ ++let SubRegIndices = [sub_32] in { ++def V0 : LoongArchRegWithSubRegs<4, "r4", [A0]>, DwarfRegNum<[4]>; ++def V1 : LoongArchRegWithSubRegs<5, "r5", [A1]>, DwarfRegNum<[5]>; ++} ++ ++/// General Purpose 64-bit Registers ++def ZERO_64 : LoongArch64GPR<0, "zero", [ZERO]>, DwarfRegNum<[0]>; ++def RA_64 : LoongArch64GPR<1, "ra", [RA]>, DwarfRegNum<[1]>; ++def TP_64 : LoongArch64GPR<2, "tp", [TP]>, DwarfRegNum<[2]>; ++def SP_64 : LoongArch64GPR<3, "sp", [SP]>, DwarfRegNum<[3]>; ++def A0_64 : LoongArch64GPR<4, "r4", [A0]>, DwarfRegNum<[4]>; ++def A1_64 : LoongArch64GPR<5, "r5", [A1]>, DwarfRegNum<[5]>; ++def A2_64 : LoongArch64GPR<6, "r6", [A2]>, DwarfRegNum<[6]>; ++def A3_64 : LoongArch64GPR<7, "r7", [A3]>, DwarfRegNum<[7]>; ++def A4_64 : LoongArch64GPR<8, "r8", [A4]>, DwarfRegNum<[8]>; ++def A5_64 : LoongArch64GPR<9, "r9", [A5]>, DwarfRegNum<[9]>; ++def A6_64 : LoongArch64GPR<10, "r10", [A6]>, DwarfRegNum<[10]>; ++def A7_64 : LoongArch64GPR<11, "r11", [A7]>, DwarfRegNum<[11]>; ++def T0_64 : LoongArch64GPR<12, "r12", [T0]>, DwarfRegNum<[12]>; ++def T1_64 : LoongArch64GPR<13, "r13", [T1]>, DwarfRegNum<[13]>; ++def T2_64 : LoongArch64GPR<14, "r14", [T2]>, DwarfRegNum<[14]>; ++def T3_64 : LoongArch64GPR<15, "r15", [T3]>, DwarfRegNum<[15]>; ++def T4_64 : LoongArch64GPR<16, "r16", [T4]>, DwarfRegNum<[16]>; ++def T5_64 : LoongArch64GPR<17, "r17", [T5]>, DwarfRegNum<[17]>; ++def T6_64 : LoongArch64GPR<18, "r18", [T6]>, DwarfRegNum<[18]>; ++def T7_64 : LoongArch64GPR<19, "r19", [T7]>, DwarfRegNum<[19]>; ++def T8_64 : LoongArch64GPR<20, "r20", [T8]>, DwarfRegNum<[20]>; ++def T9_64 : LoongArch64GPR<21, "r21", [T9]>, DwarfRegNum<[21]>; ++def FP_64 : LoongArch64GPR<22, "r22", [FP]>, DwarfRegNum<[22]>; ++def S0_64 : LoongArch64GPR<23, "r23", [S0]>, DwarfRegNum<[23]>; ++def S1_64 : LoongArch64GPR<24, "r24", [S1]>, DwarfRegNum<[24]>; ++def S2_64 : LoongArch64GPR<25, "r25", [S2]>, DwarfRegNum<[25]>; ++def S3_64 : LoongArch64GPR<26, "r26", [S3]>, DwarfRegNum<[26]>; ++def S4_64 : LoongArch64GPR<27, "r27", [S4]>, DwarfRegNum<[27]>; ++def S5_64 : LoongArch64GPR<28, "r28", [S5]>, DwarfRegNum<[28]>; ++def S6_64 : LoongArch64GPR<29, "r29", [S6]>, DwarfRegNum<[29]>; ++def S7_64 : LoongArch64GPR<30, "r30", [S7]>, DwarfRegNum<[30]>; ++def S8_64 : LoongArch64GPR<31, "r31", [S8]>, DwarfRegNum<[31]>; ++ ++let SubRegIndices = [sub_64] in { ++def V0_64 : LoongArch64GPR<4, "r4", [A0_64]>, DwarfRegNum<[4]>; ++def V1_64 : LoongArch64GPR<5, "r5", [A1_64]>, DwarfRegNum<[5]>; ++} ++ ++/// FP registers ++foreach I = 0-31 in ++def F#I : FGR32, DwarfRegNum<[!add(I, 32)]>; ++ ++foreach I = 0-31 in ++def F#I#_64 : FGR64("F"#I)]>, DwarfRegNum<[!add(I, 32)]>; + ++/// FP Condition Flag 0~7 + foreach I = 0-7 in + def FCC#I : LoongArchReg; + +-def CFR : RegisterClass<"LoongArch", [GRLenVT], 32, (sequence "FCC%u", 0, 7)> { +- let RegInfos = GRLenRI; ++/// FP Control and Status Registers, FCSR 1~3 ++foreach I = 1-3 in ++def FCSR#I : LoongArchReg; ++ ++class FCSRReg Enc, string n, list subregs> : ++ RegisterWithSubRegs { ++// field bits<2> chan_encoding = 0; ++ let Namespace = "LoongArch"; ++ let SubRegIndices = [sub_fcsr1, sub_fcsr2, sub_fcsr3]; ++// let HWEncoding{8-0} = encoding{8-0}; ++// let HWEncoding{10-9} = chan_encoding; + } + +-// Control and status registers ++def FCSR0 : FCSRReg<0, "fcsr0", [FCSR1, FCSR2, FCSR3]>; + +-foreach I = 0-3 in +-def FCSR#I : LoongArchReg; ++/// PC register ++//let NameSpace = "LoongArch" in ++//def PC : Register<"pc">; ++ ++//===----------------------------------------------------------------------===// ++// Register Classes ++//===----------------------------------------------------------------------===// ++ ++def GPR32 : RegisterClass<"LoongArch", [i32], 32, (add ++ // Reserved ++ ZERO, ++ // Return Values and Arguments ++ A0, A1, A2, A3, A4, A5, A6, A7, ++ // Not preserved across procedure calls ++ T0, T1, T2, T3, T4, T5, T6, T7, T8, ++ // Callee save ++ S0, S1, S2, S3, S4, S5, S6, S7, S8, ++ // Reserved ++ RA, TP, SP, ++ // Reserved ++ T9, FP)>; ++ ++def GPR64 : RegisterClass<"LoongArch", [i64], 64, (add ++ // Reserved ++ ZERO_64, ++ // Return Values and Arguments ++ A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, ++ // Not preserved across procedure calls ++ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, ++ // Callee save ++ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, S8_64, ++ // Reserved ++ RA_64, TP_64, SP_64, ++ // Reserved ++ T9_64, FP_64)>; ++ ++def GPRTC64 : RegisterClass<"LoongArch", [i64], 64, (add ++ // Return Values and Arguments ++ A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, ++ // Not preserved across procedure calls ++ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64)>; ++ ++/// FP Registers. ++def FGR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; ++def FGR32 : RegisterClass<"LoongArch", [f32], 64, (sequence "F%u", 0, 31)>; ++ ++/// FP condition Flag registers. ++def FCFR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCC%u", 0, 7)>, ++ Unallocatable; ++ ++def SP32 : RegisterClass<"LoongArch", [i32], 32, (add SP)>, Unallocatable; ++def SP64 : RegisterClass<"LoongArch", [i64], 64, (add SP_64)>, Unallocatable; ++def TP32 : RegisterClass<"LoongArch", [i32], 32, (add TP)>, Unallocatable; ++def TP64 : RegisterClass<"LoongArch", [i64], 64, (add TP_64)>, Unallocatable; ++ ++/// FP control and Status registers. ++def FCSR : RegisterClass<"LoongArch", [i32], 4, (sequence "FCSR%u", 0, 3)>, ++ Unallocatable; ++ ++//LSX ++foreach I = 0-31 in ++def VR#I : LSX128("F"#I#"_64")]>, ++ DwarfRegNum<[!add(I, 32)]>; ++ ++//LASX ++foreach I = 0-31 in ++def XR#I : LASX256("VR"#I)]>, ++ DwarfRegNum<[!add(I, 32)]>; ++ ++def LSX128B: RegisterClass<"LoongArch", [v16i8], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128H: RegisterClass<"LoongArch", [v8i16], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128W: RegisterClass<"LoongArch", [v4i32, v4f32], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128D: RegisterClass<"LoongArch", [v2i64, v2f64], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LASX256B: RegisterClass<"LoongArch", [v32i8], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256H: RegisterClass<"LoongArch", [v16i16], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256W: RegisterClass<"LoongArch", [v8i32, v8f32], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256D: RegisterClass<"LoongArch", [v4i64, v4f64], 256, ++ (sequence "XR%u", 0, 31)>; ++ ++//===----------------------------------------------------------------------===// ++// Register Operands. ++//===----------------------------------------------------------------------===// + +-let isAllocatable = false in +-def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>; ++class LoongArchAsmRegOperand : AsmOperandClass { ++ let ParserMethod = "parseAnyRegister"; ++} ++ ++def GPR32AsmOperand : LoongArchAsmRegOperand { ++ let Name = "GPR32AsmReg"; ++ let PredicateMethod = "isGPRAsmReg"; ++} ++ ++def GPR64AsmOperand : LoongArchAsmRegOperand { ++ let Name = "GPR64AsmReg"; ++ let PredicateMethod = "isGPRAsmReg"; ++} ++ ++def FGR32AsmOperand : LoongArchAsmRegOperand { ++ let Name = "FGR32AsmReg"; ++ let PredicateMethod = "isFGRAsmReg"; ++} ++ ++def FGR64AsmOperand : LoongArchAsmRegOperand { ++ let Name = "FGR64AsmReg"; ++ let PredicateMethod = "isFGRAsmReg"; ++} ++ ++def FCSRAsmOperand : LoongArchAsmRegOperand { ++ let Name = "FCSRAsmReg"; ++} ++ ++def FCFRAsmOperand : LoongArchAsmRegOperand { ++ let Name = "FCFRAsmReg"; ++} ++ ++//LSX ++def LSX128AsmOperand : LoongArchAsmRegOperand { ++ let Name = "LSX128AsmReg"; ++} ++ ++//LASX ++def LASX256AsmOperand : LoongArchAsmRegOperand { ++ let Name = "LASX256AsmReg"; ++} ++ ++def GPR32Opnd : RegisterOperand { ++ let ParserMatchClass = GPR32AsmOperand; ++} ++ ++def GPR64Opnd : RegisterOperand { ++ let ParserMatchClass = GPR64AsmOperand; ++} ++ ++def GPRTC64Opnd : RegisterOperand { ++ let ParserMatchClass = GPR64AsmOperand; ++} ++ ++def FGR32Opnd : RegisterOperand { ++ let ParserMatchClass = FGR32AsmOperand; ++} ++ ++def FGR64Opnd : RegisterOperand { ++ let ParserMatchClass = FGR64AsmOperand; ++} ++ ++def FCSROpnd : RegisterOperand { ++ let ParserMatchClass = FCSRAsmOperand; ++} ++ ++def FCFROpnd : RegisterOperand { ++ let ParserMatchClass = FCFRAsmOperand; ++} ++ ++//LSX ++def LSX128BOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128HOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128WOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128DOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++//LASX ++def LASX256BOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256HOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256WOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256DOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +index ff84e7c8c..ef990ae09 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information -*- C++ -*--=// ++//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information --------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,7 +11,16 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchSubtarget.h" +-#include "LoongArchFrameLowering.h" ++#include "LoongArch.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchRegisterInfo.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/raw_ostream.h" + + using namespace llvm; + +@@ -23,32 +32,74 @@ using namespace llvm; + + void LoongArchSubtarget::anchor() {} + +-LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( +- const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, +- StringRef ABIName) { +- bool Is64Bit = TT.isArch64Bit(); +- if (CPU.empty()) +- CPU = Is64Bit ? "generic-la64" : "generic-la32"; ++LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, ++ StringRef FS, ++ const LoongArchTargetMachine &TM, ++ MaybeAlign StackAlignOverride) ++ : LoongArchGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), HasLA64(false), ++ IsSoftFloat(false), IsSingleFloat(false), IsFP64bit(false), HasLSX(false), ++ HasLASX(false), UnalignedAccess(false), ++ StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), ++ TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS, TM)), ++ FrameLowering(*this), TLInfo(TM, *this) { ++ ++ // Check if Architecture and ABI are compatible. ++ assert(((!is64Bit() && isABI_LP32()) || ++ (is64Bit() && (isABI_LPX32() || isABI_LP64()))) && ++ "Invalid Arch & ABI pair."); ++ ++ if (hasLSX() && !isFP64bit()) ++ report_fatal_error("LSX requires 64-bit floating point register." ++ "See -mattr=+fp64.", ++ false); ++ ++ assert(isFP64bit()); ++} ++ ++bool LoongArchSubtarget::isPositionIndependent() const { ++ return TM.isPositionIndependent(); ++} ++ ++/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. ++bool LoongArchSubtarget::enablePostRAScheduler() const { return true; } + +- if (TuneCPU.empty()) +- TuneCPU = CPU; ++void LoongArchSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { ++ CriticalPathRCs.clear(); ++ CriticalPathRCs.push_back(is64Bit() ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass); ++} ++ ++CodeGenOpt::Level LoongArchSubtarget::getOptLevelToEnablePostRAScheduler() const { ++ return CodeGenOpt::Aggressive; ++} ++ ++LoongArchSubtarget & ++LoongArchSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, ++ const TargetMachine &TM) { ++ StringRef CPUName = LoongArch_MC::selectLoongArchCPU(TM.getTargetTriple(), CPU); + +- ParseSubtargetFeatures(CPU, TuneCPU, FS); +- if (Is64Bit) { +- GRLenVT = MVT::i64; +- GRLen = 64; ++ // Parse features string. ++ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); ++ // Initialize scheduling itinerary for the specified CPU. ++ InstrItins = getInstrItineraryForCPU(CPUName); ++ ++ if (StackAlignOverride) ++ stackAlignment = *StackAlignOverride; ++ else if (isABI_LPX32() || isABI_LP64()) ++ stackAlignment = Align(16); ++ else { ++ assert(isABI_LP32() && "Unknown ABI for stack alignment!"); ++ stackAlignment = Align(8); + } + +- // TODO: ILP32{S,F} LP64{S,F} +- TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; + return *this; + } + +-LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, +- StringRef TuneCPU, StringRef FS, +- StringRef ABIName, +- const TargetMachine &TM) +- : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), +- FrameLowering( +- initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), +- InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} ++Reloc::Model LoongArchSubtarget::getRelocationModel() const { ++ return TM.getRelocationModel(); ++} ++ ++bool LoongArchSubtarget::isABI_LP64() const { return getABI().IsLP64(); } ++bool LoongArchSubtarget::isABI_LPX32() const { return getABI().IsLPX32(); } ++bool LoongArchSubtarget::isABI_LP32() const { return getABI().IsLP32(); } ++const LoongArchABIInfo &LoongArchSubtarget::getABI() const { return TM.getABI(); } +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index fbe7a176b..588d9f46b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -1,4 +1,4 @@ +-//===- LoongArchSubtarget.h - Define Subtarget for the LoongArch -*- C++ -*-==// ++//===-- LoongArchSubtarget.h - Define Subtarget for the LoongArch ---------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -13,15 +13,16 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H + ++#include "MCTargetDesc/LoongArchABIInfo.h" + #include "LoongArchFrameLowering.h" + #include "LoongArchISelLowering.h" + #include "LoongArchInstrInfo.h" +-#include "LoongArchRegisterInfo.h" +-#include "MCTargetDesc/LoongArchBaseInfo.h" + #include "llvm/CodeGen/SelectionDAGTargetInfo.h" + #include "llvm/CodeGen/TargetSubtargetInfo.h" + #include "llvm/IR/DataLayout.h" +-#include "llvm/Target/TargetMachine.h" ++#include "llvm/MC/MCInstrItineraries.h" ++#include "llvm/Support/ErrorHandling.h" ++#include + + #define GET_SUBTARGETINFO_HEADER + #include "LoongArchGenSubtargetInfo.inc" +@@ -29,61 +30,114 @@ + namespace llvm { + class StringRef; + ++class LoongArchTargetMachine; ++ + class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + virtual void anchor(); +- bool HasLA64 = false; +- bool HasBasicF = false; +- bool HasBasicD = false; +- bool HasExtLSX = false; +- bool HasExtLASX = false; +- bool HasExtLVZ = false; +- bool HasExtLBT = false; +- unsigned GRLen = 32; +- MVT GRLenVT = MVT::i32; +- LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +- LoongArchFrameLowering FrameLowering; +- LoongArchInstrInfo InstrInfo; +- LoongArchRegisterInfo RegInfo; +- LoongArchTargetLowering TLInfo; +- +- /// Initializes using the passed in CPU and feature strings so that we can +- /// use initializer lists for subtarget initialization. +- LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT, +- StringRef CPU, +- StringRef TuneCPU, +- StringRef FS, +- StringRef ABIName); ++ ++ // HasLA64 - The target processor has LA64 ISA support. ++ bool HasLA64; ++ ++ // IsSoftFloat - The target does not support any floating point instructions. ++ bool IsSoftFloat; ++ ++ // IsSingleFloat - The target only supports single precision float ++ // point operations. This enable the target to use all 32 32-bit ++ // floating point registers instead of only using even ones. ++ bool IsSingleFloat; ++ ++ // IsFP64bit - The target processor has 64-bit floating point registers. ++ bool IsFP64bit; ++ ++ /// Features related to the presence of specific instructions. ++ ++ // HasLSX - Supports LSX. ++ bool HasLSX; ++ ++ // HasLASX - Supports LASX. ++ bool HasLASX; ++ ++ /// The minimum alignment known to hold of the stack frame on ++ /// entry to the function and which must be maintained by every function. ++ Align stackAlignment; ++ ++ // Allow unaligned memory accesses. ++ bool UnalignedAccess; ++ ++ /// The overridden stack alignment. ++ MaybeAlign StackAlignOverride; ++ ++ InstrItineraryData InstrItins; ++ ++ const LoongArchTargetMachine &TM; ++ ++ Triple TargetTriple; ++ ++ const SelectionDAGTargetInfo TSInfo; ++ const LoongArchInstrInfo InstrInfo; ++ const LoongArchFrameLowering FrameLowering; ++ const LoongArchTargetLowering TLInfo; + + public: +- // Initializes the data members to match that of the specified triple. +- LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, +- StringRef FS, StringRef ABIName, const TargetMachine &TM); ++ bool isPositionIndependent() const; ++ /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. ++ bool enablePostRAScheduler() const override; ++ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; ++ CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; ++ ++ bool isABI_LP64() const; ++ bool isABI_LPX32() const; ++ bool isABI_LP32() const; ++ const LoongArchABIInfo &getABI() const; + +- // Parses features string setting specified subtarget options. The +- // definition of this function is auto-generated by tblgen. ++ /// This constructor initializes the data members to match that ++ /// of the specified triple. ++ LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef FS, ++ const LoongArchTargetMachine &TM, MaybeAlign StackAlignOverride); ++ ++ /// ParseSubtargetFeatures - Parses features string setting specified ++ /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + +- const LoongArchFrameLowering *getFrameLowering() const override { ++ bool is64Bit() const { return HasLA64; } ++ bool isFP64bit() const { return IsFP64bit; } ++ unsigned getGPRSizeInBytes() const { return is64Bit() ? 8 : 4; } ++ bool isSingleFloat() const { return IsSingleFloat; } ++ bool hasLSX() const { return HasLSX; } ++ bool hasLASX() const { return HasLASX; } ++ bool useSoftFloat() const { return IsSoftFloat; } ++ ++ bool allowUnalignedAccess() const { return UnalignedAccess; } ++ ++ bool isXRaySupported() const override { return true; } ++ ++ Align getStackAlignment() const { return stackAlignment; } ++ ++ // Grab relocation model ++ Reloc::Model getRelocationModel() const; ++ ++ LoongArchSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, ++ const TargetMachine &TM); ++ ++ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { ++ return &TSInfo; ++ } ++ const LoongArchInstrInfo *getInstrInfo() const override { ++ return &InstrInfo; ++ } ++ const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } +- const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const LoongArchRegisterInfo *getRegisterInfo() const override { +- return &RegInfo; ++ return &InstrInfo.getRegisterInfo(); + } + const LoongArchTargetLowering *getTargetLowering() const override { + return &TLInfo; + } +- bool is64Bit() const { return HasLA64; } +- bool hasBasicF() const { return HasBasicF; } +- bool hasBasicD() const { return HasBasicD; } +- bool hasExtLSX() const { return HasExtLSX; } +- bool hasExtLASX() const { return HasExtLASX; } +- bool hasExtLVZ() const { return HasExtLVZ; } +- bool hasExtLBT() const { return HasExtLBT; } +- MVT getGRLenVT() const { return GRLenVT; } +- unsigned getGRLen() const { return GRLen; } +- LoongArchABI::ABI getTargetABI() const { return TargetABI; } ++ const InstrItineraryData *getInstrItineraryData() const override { ++ return &InstrItins; ++ } + }; +-} // end namespace llvm ++} // End llvm namespace + +-#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +index 7ba5848e0..f8a1dc5fa 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch ---===// ++//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch -------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -12,12 +12,29 @@ + + #include "LoongArchTargetMachine.h" + #include "LoongArch.h" +-#include "MCTargetDesc/LoongArchBaseInfo.h" +-#include "TargetInfo/LoongArchTargetInfo.h" ++#include "LoongArchISelDAGToDAG.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchTargetTransformInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/MachineFunction.h" + #include "llvm/CodeGen/Passes.h" +-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + #include "llvm/CodeGen/TargetPassConfig.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/Function.h" + #include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include + + using namespace llvm; + +@@ -26,29 +43,63 @@ using namespace llvm; + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { + // Register the target. + RegisterTargetMachine X(getTheLoongArch32Target()); +- RegisterTargetMachine Y(getTheLoongArch64Target()); ++ RegisterTargetMachine A(getTheLoongArch64Target()); + } + +-static std::string computeDataLayout(const Triple &TT) { +- if (TT.isArch64Bit()) +- return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; +- assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported"); +- return "e-m:e-p:32:32-i64:64-n32-S128"; ++static std::string computeDataLayout(const Triple &TT, StringRef CPU, ++ const TargetOptions &Options) { ++ std::string Ret; ++ LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions); ++ ++ Ret += "e"; ++ ++ if (ABI.IsLP32()) ++ Ret += "-m:m"; ++ else ++ Ret += "-m:e"; ++ ++ // Pointers are 32 bit on some ABIs. ++ if (!ABI.IsLP64()) ++ Ret += "-p:32:32"; ++ ++ // 8 and 16 bit integers only need to have natural alignment, but try to ++ // align them to 32 bits. 64 bit integers have natural alignment. ++ Ret += "-i8:8:32-i16:16:32-i64:64"; ++ ++ // 32 bit registers are always available and the stack is at least 64 bit ++ // aligned. On LP64 64 bit registers are also available and the stack is ++ // 128 bit aligned. ++ if (ABI.IsLP64() || ABI.IsLPX32()) ++ Ret += "-n32:64-S128"; ++ else ++ Ret += "-n32-S64"; ++ ++ return Ret; + } + +-static Reloc::Model getEffectiveRelocModel(const Triple &TT, ++static Reloc::Model getEffectiveRelocModel(bool JIT, + Optional RM) { +- return RM.value_or(Reloc::Static); ++ if (!RM.hasValue() || JIT) ++ return Reloc::Static; ++ return *RM; + } + +-LoongArchTargetMachine::LoongArchTargetMachine( +- const Target &T, const Triple &TT, StringRef CPU, StringRef FS, +- const TargetOptions &Options, Optional RM, +- Optional CM, CodeGenOpt::Level OL, bool JIT) +- : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, +- getEffectiveRelocModel(TT, RM), ++// On function prologue, the stack is created by decrementing ++// its pointer. Once decremented, all references are done with positive ++// offset from the stack/frame pointer, using StackGrowsUp enables ++// an easier handling. ++// Using CodeModel::Large enables different CALL behavior. ++LoongArchTargetMachine::LoongArchTargetMachine(const Target &T, const Triple &TT, ++ StringRef CPU, StringRef FS, ++ const TargetOptions &Options, ++ Optional RM, ++ Optional CM, ++ CodeGenOpt::Level OL, bool JIT) ++ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, ++ CPU, FS, Options, getEffectiveRelocModel(JIT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), +- TLOF(std::make_unique()) { ++ TLOF(std::make_unique()), ++ ABI(LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)) { + initAsmInfo(); + } + +@@ -57,44 +108,45 @@ LoongArchTargetMachine::~LoongArchTargetMachine() = default; + const LoongArchSubtarget * + LoongArchTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); +- Attribute TuneAttr = F.getFnAttribute("tune-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + +- std::string CPU = +- CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; +- std::string TuneCPU = +- TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; +- std::string FS = +- FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; ++ std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ++ ? CPUAttr.getValueAsString().str() ++ : TargetCPU; ++ std::string FS = !FSAttr.hasAttribute(Attribute::None) ++ ? FSAttr.getValueAsString().str() ++ : TargetFS; ++ ++ // FIXME: This is related to the code below to reset the target options, ++ // we need to know whether or not the soft float flag is set on the ++ // function, so we can enable it as a subtarget feature. ++ bool softFloat = ++ F.hasFnAttribute("use-soft-float") && ++ F.getFnAttribute("use-soft-float").getValueAsString() == "true"; ++ ++ if (softFloat) ++ FS += FS.empty() ? "+soft-float" : ",+soft-float"; + +- std::string Key = CPU + TuneCPU + FS; +- auto &I = SubtargetMap[Key]; ++ auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); +- auto ABIName = Options.MCOptions.getABIName(); +- if (const MDString *ModuleTargetABI = dyn_cast_or_null( +- F.getParent()->getModuleFlag("target-abi"))) { +- auto TargetABI = LoongArchABI::getTargetABI(ABIName); +- if (TargetABI != LoongArchABI::ABI_Unknown && +- ModuleTargetABI->getString() != ABIName) { +- report_fatal_error("-target-abi option != target-abi module flag"); +- } +- ABIName = ModuleTargetABI->getString(); +- } +- I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, +- ABIName, *this); ++ I = std::make_unique(TargetTriple, CPU, FS, *this, ++ MaybeAlign(F.getParent()->getOverrideStackAlignment())); + } + return I.get(); + } + + namespace { ++ ++/// LoongArch Code Generator Pass Configuration Options. + class LoongArchPassConfig : public TargetPassConfig { + public: + LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) +- : TargetPassConfig(TM, PM) {} ++ : TargetPassConfig(TM, PM) { ++ } + + LoongArchTargetMachine &getLoongArchTargetMachine() const { + return getTM(); +@@ -102,22 +154,42 @@ public: + + void addIRPasses() override; + bool addInstSelector() override; ++ void addPreEmitPass() override; + }; +-} // end namespace + +-TargetPassConfig * +-LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { ++} // end anonymous namespace ++ ++TargetPassConfig *LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { + return new LoongArchPassConfig(*this, PM); + } + + void LoongArchPassConfig::addIRPasses() { +- addPass(createAtomicExpandPass()); +- + TargetPassConfig::addIRPasses(); ++ addPass(createAtomicExpandPass()); + } +- ++// Install an instruction selector pass using ++// the ISelDag to gen LoongArch code. + bool LoongArchPassConfig::addInstSelector() { +- addPass(createLoongArchISelDag(getLoongArchTargetMachine())); +- ++ addPass(createLoongArchModuleISelDagPass()); ++ addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); + return false; + } ++ ++TargetTransformInfo ++LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const { ++ LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n"); ++ return TargetTransformInfo(BasicTTIImpl(this, F)); ++} ++ ++// Implemented by targets that want to run passes immediately before ++// machine code is emitted. return true if -print-machineinstrs should ++// print out the code after the passes. ++void LoongArchPassConfig::addPreEmitPass() { ++ // Expand pseudo instructions that are sensitive to register allocation. ++ addPass(createLoongArchExpandPseudoPass()); ++ ++ // Relax conditional branch instructions if they're otherwise out of ++ // range of their destination. ++ // This pass must be run after any pseudo instruction expansion ++ addPass(&BranchRelaxationPassID); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +index cbd872031..8e395d183 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +@@ -1,4 +1,4 @@ +-//=- LoongArchTargetMachine.h - Define TargetMachine for LoongArch -*- C++ -*-// ++//===- LoongArchTargetMachine.h - Define TargetMachine for LoongArch ------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -13,25 +13,33 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H + ++#include "MCTargetDesc/LoongArchABIInfo.h" + #include "LoongArchSubtarget.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Support/CodeGen.h" + #include "llvm/Target/TargetMachine.h" ++#include + + namespace llvm { + + class LoongArchTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; ++ // Selected ABI ++ LoongArchABIInfo ABI; ++ + mutable StringMap> SubtargetMap; + + public: + LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, +- StringRef FS, const TargetOptions &Options, +- Optional RM, +- Optional CM, CodeGenOpt::Level OL, +- bool JIT); ++ StringRef FS, const TargetOptions &Options, ++ Optional RM, Optional CM, ++ CodeGenOpt::Level OL, bool JIT); + ~LoongArchTargetMachine() override; + ++ TargetTransformInfo getTargetTransformInfo(const Function &F) const override; + const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; +- const LoongArchSubtarget *getSubtargetImpl() const = delete; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; +@@ -39,6 +47,20 @@ public: + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } ++ ++ /// Returns true if a cast between SrcAS and DestAS is a noop. ++ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { ++ // Mips doesn't have any special address spaces so we just reserve ++ // the first 256 for software use (e.g. OpenCL) and treat casts ++ // between them as noops. ++ return SrcAS < 256 && DestAS < 256; ++ } ++ ++ const LoongArchABIInfo &getABI() const { return ABI; } ++ ++ bool isMachineVerifierClean() const override { ++ return false; ++ } + }; + + } // end namespace llvm +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp +new file mode 100644 +index 000000000..9c6250d28 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp +@@ -0,0 +1,26 @@ ++//===-- LoongArchTargetObjectFile.cpp - LoongArch Object Files ----------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DerivedTypes.h" ++#include "llvm/IR/GlobalVariable.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Target/TargetMachine.h" ++using namespace llvm; ++ ++void LoongArchTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ++ TargetLoweringObjectFileELF::Initialize(Ctx, TM); ++ InitializeELF(TM.Options.UseInitArray); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h +new file mode 100644 +index 000000000..a50c57171 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h +@@ -0,0 +1,24 @@ ++//===-- llvm/Target/LoongArchTargetObjectFile.h - LoongArch Object Info ---*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H ++ ++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" ++ ++namespace llvm { ++class LoongArchTargetMachine; ++ class LoongArchTargetObjectFile : public TargetLoweringObjectFileELF { ++ ++ public: ++ ++ void Initialize(MCContext &Ctx, const TargetMachine &TM) override; ++ }; ++} // end namespace llvm ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h b/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h +new file mode 100644 +index 000000000..a9adc32d0 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h +@@ -0,0 +1,130 @@ ++//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer ------------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H ++ ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/MC/MCELFStreamer.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCStreamer.h" ++ ++namespace llvm { ++ ++class formatted_raw_ostream; ++ ++struct LoongArchFPABIInfo; ++ ++class LoongArchTargetStreamer : public MCTargetStreamer { ++public: ++ LoongArchTargetStreamer(MCStreamer &S); ++ ++ virtual void setPic(bool Value) {} ++ ++ virtual void emitDirectiveOptionPic0(); ++ virtual void emitDirectiveOptionPic2(); ++ ++ virtual void emitDirectiveSetArch(StringRef Arch); ++ virtual void emitDirectiveSetLoongArch32(); ++ virtual void emitDirectiveSetloongarch64(); ++ ++ void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, MCOperand Op2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int32_t Imm, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, ++ MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, ++ int16_t Imm1, int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitAdd(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, ++ const MCSubtargetInfo *STI); ++ void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI); ++ ++ void forbidModuleDirective() { ModuleDirectiveAllowed = false; } ++ void reallowModuleDirective() { ModuleDirectiveAllowed = true; } ++ bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } ++ ++ template ++ void updateABIInfo(const PredicateLibrary &P) { ++ ABI = P.getABI(); ++ } ++ ++ const LoongArchABIInfo &getABI() const { ++ assert(ABI.hasValue() && "ABI hasn't been set!"); ++ return *ABI; ++ } ++ ++protected: ++ llvm::Optional ABI; ++ ++ bool GPRInfoSet; ++ ++ bool FPRInfoSet; ++ ++ bool FrameInfoSet; ++ int FrameOffset; ++ unsigned FrameReg; ++ unsigned ReturnReg; ++ ++private: ++ bool ModuleDirectiveAllowed; ++}; ++ ++// This part is for ascii assembly output ++class LoongArchTargetAsmStreamer : public LoongArchTargetStreamer { ++ formatted_raw_ostream &OS; ++ ++public: ++ LoongArchTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); ++ ++ void emitDirectiveOptionPic0() override; ++ void emitDirectiveOptionPic2() override; ++ ++ void emitDirectiveSetArch(StringRef Arch) override; ++ void emitDirectiveSetLoongArch32() override; ++ void emitDirectiveSetloongarch64() override; ++}; ++ ++// This part is for ELF object output ++class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { ++ const MCSubtargetInfo &STI; ++ bool Pic; ++ ++public: ++ MCELFStreamer &getStreamer(); ++ LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); ++ ++ void setPic(bool Value) override { Pic = Value; } ++ ++ void emitLabel(MCSymbol *Symbol) override; ++ void finish() override; ++ ++ void emitDirectiveOptionPic0() override; ++ void emitDirectiveOptionPic2() override; ++}; ++} ++#endif +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +new file mode 100644 +index 000000000..9510dc027 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -0,0 +1,325 @@ ++//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI pass ++//----------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++/// \file ++/// This file implements a TargetTransformInfo analysis pass specific to the ++/// LoongArch target machine. It uses the target's detailed information to ++/// provide more precise answers to certain TTI queries, while letting the ++/// target independent and default TTI implementations handle the rest. ++/// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetTransformInfo.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/CostTable.h" ++#include "llvm/CodeGen/TargetLowering.h" ++#include "llvm/IR/IntrinsicInst.h" ++#include "llvm/Support/Debug.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "LoongArchtti" ++ ++//===----------------------------------------------------------------------===// ++// ++// LoongArch cost model. ++// ++//===----------------------------------------------------------------------===// ++ ++bool LoongArchTTIImpl::areInlineCompatible(const Function *Caller, ++ const Function *Callee) const { ++ const TargetMachine &TM = getTLI()->getTargetMachine(); ++ ++ const FeatureBitset &CallerBits = ++ TM.getSubtargetImpl(*Caller)->getFeatureBits(); ++ const FeatureBitset &CalleeBits = ++ TM.getSubtargetImpl(*Callee)->getFeatureBits(); ++ ++ // Inline a callee if its target-features are a subset of the callers ++ // target-features. ++ return (CallerBits & CalleeBits) == CalleeBits; ++} ++ ++TargetTransformInfo::PopcntSupportKind ++LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { ++ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); ++ if (TyWidth == 32 || TyWidth == 64) ++ return TTI::PSK_FastHardware; ++ return TTI::PSK_Software; ++} ++ ++unsigned LoongArchTTIImpl::getNumberOfRegisters(bool Vector) { ++ if (Vector && !ST->hasLSX()) ++ return 0; ++ ++ return 32; ++} ++ ++unsigned LoongArchTTIImpl::getRegisterBitWidth(bool Vector) const { ++ if (Vector) { ++ if (ST->hasLASX()) ++ return 256; ++ ++ if (ST->hasLSX()) ++ return 128; ++ ++ return 0; ++ } ++ return 64; ++} ++ ++unsigned LoongArchTTIImpl::getMaxInterleaveFactor(unsigned VF) { ++ if (VF == 1) ++ return 1; ++ return 2; ++} ++ ++InstructionCost LoongArchTTIImpl::getArithmeticInstrCost( ++ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, ++ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, ++ TTI::OperandValueProperties Opd1PropInfo, ++ TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, ++ const Instruction *CxtI) { ++ ++ std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); ++ ++ int ISD = TLI->InstructionOpcodeToISD(Opcode); ++ assert(ISD && "Invalid opcode"); ++ ++ static const CostTblEntry LASXCostTable[] = { ++ ++ {ISD::SHL, MVT::v32i8, 1}, ++ {ISD::SHL, MVT::v16i16, 1}, ++ {ISD::SHL, MVT::v8i32, 1}, ++ {ISD::SHL, MVT::v4i64, 1}, ++ ++ {ISD::SRL, MVT::v32i8, 1}, ++ {ISD::SRL, MVT::v16i16, 1}, ++ {ISD::SRL, MVT::v8i32, 1}, ++ {ISD::SRL, MVT::v4i64, 1}, ++ ++ {ISD::SRA, MVT::v32i8, 1}, ++ {ISD::SRA, MVT::v16i16, 1}, ++ {ISD::SRA, MVT::v8i32, 1}, ++ {ISD::SRA, MVT::v4i64, 1}, ++ ++ {ISD::SUB, MVT::v32i8, 1}, ++ {ISD::SUB, MVT::v16i16, 1}, ++ {ISD::SUB, MVT::v8i32, 1}, ++ {ISD::SUB, MVT::v4i64, 1}, ++ ++ {ISD::ADD, MVT::v32i8, 1}, ++ {ISD::ADD, MVT::v16i16, 1}, ++ {ISD::ADD, MVT::v8i32, 1}, ++ {ISD::ADD, MVT::v4i64, 1}, ++ ++ {ISD::MUL, MVT::v32i8, 1}, ++ {ISD::MUL, MVT::v16i16, 1}, ++ {ISD::MUL, MVT::v8i32, 1}, ++ {ISD::MUL, MVT::v4i64, 1}, ++ ++ {ISD::SDIV, MVT::v32i8, 29}, ++ {ISD::SDIV, MVT::v16i16, 19}, ++ {ISD::SDIV, MVT::v8i32, 14}, ++ {ISD::SDIV, MVT::v4i64, 13}, ++ ++ {ISD::UDIV, MVT::v32i8, 29}, ++ {ISD::UDIV, MVT::v16i16, 19}, ++ {ISD::UDIV, MVT::v8i32, 14}, ++ {ISD::UDIV, MVT::v4i64, 13}, ++ ++ {ISD::SREM, MVT::v32i8, 33}, ++ {ISD::SREM, MVT::v16i16, 21}, ++ {ISD::SREM, MVT::v8i32, 15}, ++ {ISD::SREM, MVT::v4i64, 13}, ++ ++ {ISD::UREM, MVT::v32i8, 29}, ++ {ISD::UREM, MVT::v16i16, 19}, ++ {ISD::UREM, MVT::v8i32, 14}, ++ {ISD::UREM, MVT::v4i64, 13}, ++ ++ {ISD::FADD, MVT::f64, 1}, ++ {ISD::FADD, MVT::f32, 1}, ++ {ISD::FADD, MVT::v4f64, 1}, ++ {ISD::FADD, MVT::v8f32, 1}, ++ ++ {ISD::FSUB, MVT::f64, 1}, ++ {ISD::FSUB, MVT::f32, 1}, ++ {ISD::FSUB, MVT::v4f64, 1}, ++ {ISD::FSUB, MVT::v8f32, 1}, ++ ++ {ISD::FMUL, MVT::f64, 1}, ++ {ISD::FMUL, MVT::f32, 1}, ++ {ISD::FMUL, MVT::v4f64, 1}, ++ {ISD::FMUL, MVT::v8f32, 1}, ++ ++ {ISD::FDIV, MVT::f32, 12}, ++ {ISD::FDIV, MVT::f64, 10}, ++ {ISD::FDIV, MVT::v8f32, 12}, ++ {ISD::FDIV, MVT::v4f64, 10} ++ ++ }; ++ ++ if (ST->hasLASX()) ++ if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, LT.second)) ++ return LT.first * Entry->Cost; ++ ++ static const CostTblEntry LSXCostTable[] = { ++ ++ {ISD::SHL, MVT::v16i8, 1}, ++ {ISD::SHL, MVT::v8i16, 1}, ++ {ISD::SHL, MVT::v4i32, 1}, ++ {ISD::SHL, MVT::v2i64, 1}, ++ ++ {ISD::SRL, MVT::v16i8, 1}, ++ {ISD::SRL, MVT::v8i16, 1}, ++ {ISD::SRL, MVT::v4i32, 1}, ++ {ISD::SRL, MVT::v2i64, 1}, ++ ++ {ISD::SRA, MVT::v16i8, 1}, ++ {ISD::SRA, MVT::v8i16, 1}, ++ {ISD::SRA, MVT::v4i32, 1}, ++ {ISD::SRA, MVT::v2i64, 1}, ++ ++ {ISD::SUB, MVT::v16i8, 1}, ++ {ISD::SUB, MVT::v8i16, 1}, ++ {ISD::SUB, MVT::v4i32, 1}, ++ {ISD::SUB, MVT::v2i64, 1}, ++ ++ {ISD::ADD, MVT::v16i8, 1}, ++ {ISD::ADD, MVT::v8i16, 1}, ++ {ISD::ADD, MVT::v4i32, 1}, ++ {ISD::ADD, MVT::v2i64, 1}, ++ ++ {ISD::MUL, MVT::v16i8, 1}, ++ {ISD::MUL, MVT::v8i16, 1}, ++ {ISD::MUL, MVT::v4i32, 1}, ++ {ISD::MUL, MVT::v2i64, 1}, ++ ++ {ISD::SDIV, MVT::v16i8, 29}, ++ {ISD::SDIV, MVT::v8i16, 19}, ++ {ISD::SDIV, MVT::v4i32, 14}, ++ {ISD::SDIV, MVT::v2i64, 13}, ++ ++ {ISD::UDIV, MVT::v16i8, 29}, ++ {ISD::UDIV, MVT::v8i16, 19}, ++ {ISD::UDIV, MVT::v4i32, 14}, ++ {ISD::UDIV, MVT::v2i64, 13}, ++ ++ {ISD::SREM, MVT::v16i8, 33}, ++ {ISD::SREM, MVT::v8i16, 21}, ++ {ISD::SREM, MVT::v4i32, 15}, ++ {ISD::SREM, MVT::v2i64, 13}, ++ ++ {ISD::UREM, MVT::v16i8, 29}, ++ {ISD::UREM, MVT::v8i16, 19}, ++ {ISD::UREM, MVT::v4i32, 14}, ++ {ISD::UREM, MVT::v2i64, 13}, ++ ++ {ISD::FADD, MVT::f64, 1}, ++ {ISD::FADD, MVT::f32, 1}, ++ {ISD::FADD, MVT::v2f64, 1}, ++ {ISD::FADD, MVT::v4f32, 1}, ++ ++ {ISD::FSUB, MVT::f64, 1}, ++ {ISD::FSUB, MVT::f32, 1}, ++ {ISD::FSUB, MVT::v2f64, 1}, ++ {ISD::FSUB, MVT::v4f32, 1}, ++ ++ {ISD::FMUL, MVT::f64, 1}, ++ {ISD::FMUL, MVT::f32, 1}, ++ {ISD::FMUL, MVT::v2f64, 1}, ++ {ISD::FMUL, MVT::v4f32, 1}, ++ ++ {ISD::FDIV, MVT::f32, 12}, ++ {ISD::FDIV, MVT::f64, 10}, ++ {ISD::FDIV, MVT::v4f32, 12}, ++ {ISD::FDIV, MVT::v2f64, 10} ++ ++ }; ++ ++ if (ST->hasLSX()) ++ if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second)) ++ return LT.first * Entry->Cost; ++ ++ // Fallback to the default implementation. ++ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); ++} ++ ++InstructionCost LoongArchTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, ++ unsigned Index) { ++ assert(Val->isVectorTy() && "This must be a vector type"); ++ ++ Type *ScalarType = Val->getScalarType(); ++ ++ if (Index != -1U) { ++ // Legalize the type. ++ std::pair LT = TLI->getTypeLegalizationCost(DL, Val); ++ ++ // This type is legalized to a scalar type. ++ if (!LT.second.isVector()) ++ return 0; ++ ++ // The type may be split. Normalize the index to the new type. ++ unsigned Width = LT.second.getVectorNumElements(); ++ Index = Index % Width; ++ ++ // The element at index zero is already inside the vector. ++ if (Index == 0) // if (ScalarType->isFloatingPointTy() && Index == 0) ++ return 0; ++ } ++ ++ // Add to the base cost if we know that the extracted element of a vector is ++ // destined to be moved to and used in the integer register file. ++ int RegisterFileMoveCost = 0; ++ if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) ++ RegisterFileMoveCost = 1; ++ ++ return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; ++} ++ ++unsigned LoongArchTTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { ++ return getRegisterBitWidth(true); ++} ++ ++InstructionCost LoongArchTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, ++ Type *Src, ++ TTI::CastContextHint CCH, ++ TTI::TargetCostKind CostKind, ++ const Instruction *I) { ++ int ISD = TLI->InstructionOpcodeToISD(Opcode); ++ assert(ISD && "Invalid opcode"); ++ ++ static const TypeConversionCostTblEntry LASXConversionTbl[] = { ++ ++ // TODO:The cost requires more granular testing ++ {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3}, ++ {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3}, ++ {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3}, ++ {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3}, ++ {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3}, ++ {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3}, ++ ++ }; ++ ++ EVT SrcTy = TLI->getValueType(DL, Src); ++ EVT DstTy = TLI->getValueType(DL, Dst); ++ ++ if (!SrcTy.isSimple() || !DstTy.isSimple()) ++ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); ++ ++ if (ST->hasLASX()) { ++ if (const auto *Entry = ConvertCostTableLookup( ++ LASXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) ++ return Entry->Cost; ++ } ++ ++ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +new file mode 100644 +index 000000000..3a93fc8ec +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +@@ -0,0 +1,91 @@ ++//===-- LoongArchTargetTransformInfo.h - LoongArch specific TTI -------------*- ++// C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// \file ++// This file a TargetTransformInfo::Concept conforming object specific to the ++// LoongArch target machine. It uses the target's detailed information to ++// provide more precise answers to certain TTI queries, while letting the ++// target independent and default TTI implementations handle the rest. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H ++#define LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H ++ ++#include "LoongArch.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/TargetLowering.h" ++ ++namespace llvm { ++ ++class LoongArchTTIImpl : public BasicTTIImplBase { ++ typedef BasicTTIImplBase BaseT; ++ typedef TargetTransformInfo TTI; ++ friend BaseT; ++ ++ const LoongArchSubtarget *ST; ++ const LoongArchTargetLowering *TLI; ++ ++ const LoongArchSubtarget *getST() const { return ST; } ++ const LoongArchTargetLowering *getTLI() const { return TLI; } ++ ++public: ++ explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F) ++ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), ++ TLI(ST->getTargetLowering()) {} ++ ++ bool areInlineCompatible(const Function *Caller, ++ const Function *Callee) const; ++ ++ /// \name Scalar TTI Implementations ++ // /// @{ ++ ++ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); ++ ++ /// @} ++ ++ /// \name Vector TTI Implementations ++ /// @{ ++ ++ bool enableInterleavedAccessVectorization() { return true; } ++ ++ unsigned getNumberOfRegisters(bool Vector); ++ ++ unsigned getRegisterBitWidth(bool Vector) const; ++ ++ unsigned getMaxInterleaveFactor(unsigned VF); ++ ++ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, ++ unsigned Index); ++ ++ InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, ++ TTI::CastContextHint CCH, ++ TTI::TargetCostKind CostKind, ++ const Instruction *I = nullptr); ++ ++ unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; ++ ++ InstructionCost getArithmeticInstrCost( ++ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, ++ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, ++ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, ++ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, ++ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ++ ArrayRef Args = ArrayRef(), ++ const Instruction *CxtI = nullptr); ++ ++ /// @} ++}; ++ ++} // end namespace llvm ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +index 2e1ca69a3..927fa7d5b 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +@@ -1,12 +1,15 @@ +-add_llvm_component_library(LLVMLoongArchDesc ++ add_llvm_component_library(LLVMLoongArchDesc ++ LoongArchABIInfo.cpp ++ LoongArchAnalyzeImmediate.cpp + LoongArchAsmBackend.cpp +- LoongArchBaseInfo.cpp + LoongArchELFObjectWriter.cpp ++ LoongArchELFStreamer.cpp + LoongArchInstPrinter.cpp + LoongArchMCAsmInfo.cpp +- LoongArchMCTargetDesc.cpp + LoongArchMCCodeEmitter.cpp +- LoongArchMatInt.cpp ++ LoongArchMCExpr.cpp ++ LoongArchMCTargetDesc.cpp ++ LoongArchTargetStreamer.cpp + + LINK_COMPONENTS + MC +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp +new file mode 100644 +index 000000000..18b67961a +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp +@@ -0,0 +1,106 @@ ++//===---- LoongArchABIInfo.cpp - Information about LoongArch ABI's ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchABIInfo.h" ++#include "LoongArchRegisterInfo.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/MC/MCTargetOptions.h" ++ ++using namespace llvm; ++ ++namespace { ++static const MCPhysReg LP32IntRegs[4] = {LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3}; ++ ++static const MCPhysReg LoongArch64IntRegs[8] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, ++ LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; ++} ++ ++ArrayRef LoongArchABIInfo::GetByValArgRegs() const { ++ if (IsLP32()) ++ return makeArrayRef(LP32IntRegs); ++ if (IsLPX32() || IsLP64()) ++ return makeArrayRef(LoongArch64IntRegs); ++ llvm_unreachable("Unhandled ABI"); ++} ++ ++ArrayRef LoongArchABIInfo::GetVarArgRegs() const { ++ if (IsLP32()) ++ return makeArrayRef(LP32IntRegs); ++ if (IsLPX32() || IsLP64()) ++ return makeArrayRef(LoongArch64IntRegs); ++ llvm_unreachable("Unhandled ABI"); ++} ++ ++LoongArchABIInfo LoongArchABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, ++ const MCTargetOptions &Options) { ++ if (Options.getABIName().startswith("lp32")) ++ return LoongArchABIInfo::LP32(); ++ if (Options.getABIName().startswith("lpx32")) ++ return LoongArchABIInfo::LPX32(); ++ if (Options.getABIName().startswith("lp64")) ++ return LoongArchABIInfo::LP64(); ++ assert(Options.getABIName().empty() && "Unknown ABI option for LoongArch"); ++ ++ if (TT.isLoongArch64()) ++ return LoongArchABIInfo::LP64(); ++ return LoongArchABIInfo::LP32(); ++} ++ ++unsigned LoongArchABIInfo::GetStackPtr() const { ++ return ArePtrs64bit() ? LoongArch::SP_64 : LoongArch::SP; ++} ++ ++unsigned LoongArchABIInfo::GetFramePtr() const { ++ return ArePtrs64bit() ? LoongArch::FP_64 : LoongArch::FP; ++} ++ ++unsigned LoongArchABIInfo::GetBasePtr() const { ++ return ArePtrs64bit() ? LoongArch::S7_64 : LoongArch::S7; ++} ++ ++unsigned LoongArchABIInfo::GetNullPtr() const { ++ return ArePtrs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++} ++ ++unsigned LoongArchABIInfo::GetZeroReg() const { ++ return AreGprs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAddOp() const { ++ return ArePtrs64bit() ? LoongArch::ADD_D : LoongArch::ADD_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAddiOp() const { ++ return ArePtrs64bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrSubOp() const { ++ return ArePtrs64bit() ? LoongArch::SUB_D : LoongArch::SUB_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAndOp() const { ++ return ArePtrs64bit() ? LoongArch::AND : LoongArch::AND32; ++} ++ ++unsigned LoongArchABIInfo::GetGPRMoveOp() const { ++ return ArePtrs64bit() ? LoongArch::OR : LoongArch::OR32; ++} ++ ++unsigned LoongArchABIInfo::GetEhDataReg(unsigned I) const { ++ static const unsigned EhDataReg[] = { ++ LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 ++ }; ++ static const unsigned EhDataReg64[] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64 ++ }; ++ ++ return IsLP64() ? EhDataReg64[I] : EhDataReg[I]; ++} ++ +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h +new file mode 100644 +index 000000000..334ee80ea +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h +@@ -0,0 +1,76 @@ ++//===---- LoongArchABIInfo.h - Information about LoongArch ABI's --------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H ++ ++#include "llvm/ADT/Triple.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/MC/MCRegisterInfo.h" ++ ++namespace llvm { ++ ++template class ArrayRef; ++class MCTargetOptions; ++class StringRef; ++class TargetRegisterClass; ++ ++class LoongArchABIInfo { ++public: ++ enum class ABI { Unknown, LP32, LPX32, LP64 }; ++ ++protected: ++ ABI ThisABI; ++ ++public: ++ LoongArchABIInfo(ABI ThisABI) : ThisABI(ThisABI) {} ++ ++ static LoongArchABIInfo Unknown() { return LoongArchABIInfo(ABI::Unknown); } ++ static LoongArchABIInfo LP32() { return LoongArchABIInfo(ABI::LP32); } ++ static LoongArchABIInfo LPX32() { return LoongArchABIInfo(ABI::LPX32); } ++ static LoongArchABIInfo LP64() { return LoongArchABIInfo(ABI::LP64); } ++ static LoongArchABIInfo computeTargetABI(const Triple &TT, StringRef CPU, ++ const MCTargetOptions &Options); ++ ++ bool IsKnown() const { return ThisABI != ABI::Unknown; } ++ bool IsLP32() const { return ThisABI == ABI::LP32; } ++ bool IsLPX32() const { return ThisABI == ABI::LPX32; } ++ bool IsLP64() const { return ThisABI == ABI::LP64; } ++ ABI GetEnumValue() const { return ThisABI; } ++ ++ /// The registers to use for byval arguments. ++ ArrayRef GetByValArgRegs() const; ++ ++ /// The registers to use for the variable argument list. ++ ArrayRef GetVarArgRegs() const; ++ ++ /// Ordering of ABI's ++ /// LoongArchGenSubtargetInfo.inc will use this to resolve conflicts when given ++ /// multiple ABI options. ++ bool operator<(const LoongArchABIInfo Other) const { ++ return ThisABI < Other.GetEnumValue(); ++ } ++ ++ unsigned GetStackPtr() const; ++ unsigned GetFramePtr() const; ++ unsigned GetBasePtr() const; ++ unsigned GetNullPtr() const; ++ unsigned GetZeroReg() const; ++ unsigned GetPtrAddOp() const; ++ unsigned GetPtrAddiOp() const; ++ unsigned GetPtrSubOp() const; ++ unsigned GetPtrAndOp() const; ++ unsigned GetGPRMoveOp() const; ++ inline bool ArePtrs64bit() const { return IsLP64(); } ++ inline bool AreGprs64bit() const { return IsLPX32() || IsLP64(); } ++ ++ unsigned GetEhDataReg(unsigned I) const; ++}; ++} ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp +new file mode 100644 +index 000000000..96e43b2d3 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp +@@ -0,0 +1,64 @@ ++//===- LoongArchAnalyzeImmediate.cpp - Analyze Immediates -----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchAnalyzeImmediate.h" ++#include "LoongArch.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/Support/MathExtras.h" ++ ++using namespace llvm; ++ ++LoongArchAnalyzeImmediate::InstSeq ++LoongArchAnalyzeImmediate::generateInstSeq(int64_t Val, bool Is64Bit) { ++ // Val: ++ // | hi32 | lo32 | ++ // +------------+------------------+------------------+-----------+ ++ // | Bits_52_63 | Bits_32_51 | Bits_12_31 | Bits_0_11 | ++ // +------------+------------------+------------------+-----------+ ++ // 63 52 51 32 31 12 11 0 ++ unsigned ORIOp = Is64Bit ? LoongArch::ORI : LoongArch::ORI32; ++ unsigned LU12IOp = Is64Bit ? LoongArch::LU12I_W : LoongArch::LU12I_W32; ++ unsigned ADDIOp = Is64Bit ? LoongArch::ADDI_W64 : LoongArch::ADDI_W; ++ unsigned LU32IOp = LoongArch::LU32I_D_R2; ++ unsigned LU52IOp = LoongArch::LU52I_D; ++ ++ int64_t Bits_52_63 = Val >> 52 & 0xFFF; ++ int64_t Bits_32_51 = Val >> 32 & 0xFFFFF; ++ int64_t Bits_12_31 = Val >> 12 & 0xFFFFF; ++ int64_t Bits_0_11 = Val & 0xFFF; ++ ++ InstSeq Insts; ++ ++ if (isInt<12>(Val) && Is64Bit) { ++ Insts.push_back(Inst(LoongArch::ADDI_D, SignExtend64<12>(Bits_0_11))); ++ return Insts; ++ } ++ ++ if (Bits_52_63 != 0 && SignExtend64<52>(Val) == 0) { ++ Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); ++ return Insts; ++ } ++ ++ if (Bits_12_31 == 0) ++ Insts.push_back(Inst(ORIOp, Bits_0_11)); ++ else if (SignExtend32<1>(Bits_0_11 >> 11) == SignExtend32<20>(Bits_12_31)) ++ Insts.push_back(Inst(ADDIOp, SignExtend64<12>(Bits_0_11))); ++ else { ++ Insts.push_back(Inst(LU12IOp, SignExtend64<20>(Bits_12_31))); ++ if (Bits_0_11 != 0) ++ Insts.push_back(Inst(ORIOp, Bits_0_11)); ++ } ++ ++ if (SignExtend32<1>(Bits_12_31 >> 19) != SignExtend32<20>(Bits_32_51)) ++ Insts.push_back(Inst(LU32IOp, SignExtend64<20>(Bits_32_51))); ++ ++ if (SignExtend32<1>(Bits_32_51 >> 19) != SignExtend32<12>(Bits_52_63)) ++ Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); ++ ++ return Insts; ++} +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h +similarity index 62% +rename from llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h +rename to llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h +index be1b42589..3ff00f254 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h +@@ -1,4 +1,4 @@ +-//===- LoongArchMatInt.h - Immediate materialisation - --------*- C++ -*--===// ++//===- LoongArchAnalyzeImmediate.h - Analyze Immediates --------*- C++ -*--===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,14 +6,13 @@ + // + //===----------------------------------------------------------------------===// + +-#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H +-#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H + + #include "llvm/ADT/SmallVector.h" +-#include + + namespace llvm { +-namespace LoongArchMatInt { ++namespace LoongArchAnalyzeImmediate { + struct Inst { + unsigned Opc; + int64_t Imm; +@@ -23,8 +22,8 @@ using InstSeq = SmallVector; + + // Helper to generate an instruction sequence that will materialise the given + // immediate value into a register. +-InstSeq generateInstSeq(int64_t Val); +-} // end namespace LoongArchMatInt ++InstSeq generateInstSeq(int64_t Val, bool Is64Bit); ++} // end namespace LoongArchAnalyzeImmediate + } // end namespace llvm + +-#endif ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index 94a068897..b9ba9e536 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchAsmBackend.cpp - LoongArch Assembler Backend -*- C++ -*---===// ++//===-- LoongArchAsmBackend.cpp - LoongArch Asm Backend ----------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -9,41 +9,158 @@ + // This file implements the LoongArchAsmBackend class. + // + //===----------------------------------------------------------------------===// ++// + +-#include "LoongArchAsmBackend.h" +-#include "llvm/MC/MCAsmLayout.h" ++#include "MCTargetDesc/LoongArchAsmBackend.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/MC/MCAsmBackend.h" + #include "llvm/MC/MCAssembler.h" + #include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCDirectives.h" + #include "llvm/MC/MCELFObjectWriter.h" +-#include "llvm/Support/Endian.h" ++#include "llvm/MC/MCFixupKindInfo.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCTargetOptions.h" ++#include "llvm/MC/MCValue.h" + #include "llvm/Support/EndianStream.h" +- +-#define DEBUG_TYPE "loongarch-asmbackend" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/Format.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" + + using namespace llvm; + +-void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, +- const MCFixup &Fixup, +- const MCValue &Target, +- MutableArrayRef Data, uint64_t Value, +- bool IsResolved, +- const MCSubtargetInfo *STI) const { +- // TODO: Apply the Value for given Fixup into the provided data fragment. +- return; ++std::unique_ptr ++LoongArchAsmBackend::createObjectTargetWriter() const { ++ return createLoongArchELFObjectWriter(TheTriple, IsLPX32); + } + +-bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, +- const MCFixup &Fixup, +- const MCValue &Target) { +- // TODO: Determine which relocation require special processing at linking +- // time. +- return false; ++/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided ++/// data fragment, at the offset specified by the fixup and following the ++/// fixup kind as appropriate. ++void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, ++ const MCValue &Target, ++ MutableArrayRef Data, uint64_t Value, ++ bool IsResolved, ++ const MCSubtargetInfo *STI) const { ++ MCFixupKind Kind = Fixup.getKind(); ++ if (Kind > FirstTargetFixupKind) ++ return; ++ ++ if (!Value) ++ return; // Doesn't change encoding. ++ ++ // Where do we start in the object ++ unsigned Offset = Fixup.getOffset(); ++ // Number of bytes we need to fixup ++ unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; ++ ++ ++ // Grab current value, if any, from bits. ++ uint64_t CurVal = 0; ++ ++ for (unsigned i = 0; i != NumBytes; ++i) ++ CurVal |= (uint64_t)((uint8_t)Data[Offset + i]) << (i*8); ++ ++ uint64_t Mask = ((uint64_t)(-1) >> ++ (64 - getFixupKindInfo(Kind).TargetSize)); ++ CurVal |= Value & Mask; ++ ++ // Write out the fixed up bytes back to the code/data bits. ++ for (unsigned i = 0; i != NumBytes; ++i) ++ Data[Offset + i] = (uint8_t)((CurVal >> (i*8)) & 0xff); + } + ++Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { ++ return StringSwitch>(Name) ++ .Case("R_LARCH_NONE", (MCFixupKind)LoongArch::fixup_LARCH_NONE) ++ .Case("R_LARCH_32", FK_Data_4) ++ .Case("R_LARCH_64", FK_Data_8) ++ .Default(MCAsmBackend::getFixupKind(Name)); ++} ++ ++const MCFixupKindInfo &LoongArchAsmBackend:: ++getFixupKindInfo(MCFixupKind Kind) const { ++ const static MCFixupKindInfo Infos[] = { ++ // This table *must* be in same the order of fixup_* kinds in ++ // LoongArchFixupKinds.h. ++ // ++ // name offset bits flags ++ { "fixup_LARCH_NONE", 0, 0, 0 }, ++ { "fixup_LARCH_SOP_PUSH_ABSOLUTE", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_PCREL", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_GPREL", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_TLS_TPREL", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_TLS_GOT", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_TLS_GD", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_PLT_PCREL", 0, 0, 0}, ++ { "fixup_LARCH_32", 0, 0, 0}, ++ { "fixup_LARCH_64", 0, 0, 0}, ++ { "fixup_LARCH_RELATIVE", 0, 0, 0}, ++ { "fixup_LARCH_COPY", 0, 0, 0}, ++ { "fixup_LARCH_JUMP_SLOT", 0, 0, 0}, ++ { "fixup_LARCH_TLS_DTPMOD32", 0, 0, 0}, ++ { "fixup_LARCH_TLS_DTPMOD64", 0, 0, 0}, ++ { "fixup_LARCH_TLS_DTPREL32", 0, 0, 0}, ++ { "fixup_LARCH_TLS_DTPREL64", 0, 0, 0}, ++ { "fixup_LARCH_TLS_TPREL32", 0, 0, 0}, ++ { "fixup_LARCH_TLS_TPREL64", 0, 0, 0}, ++ { "fixup_LARCH_IRELATIVE", 0, 0, 0}, ++ { "fixup_LARCH_MARK_LA", 0, 0, 0}, ++ { "fixup_LARCH_MARK_PCREL", 0, 0, 0}, ++ { "fixup_LARCH_SOP_PUSH_DUP", 0, 0, 0}, ++ { "fixup_LARCH_SOP_ASSERT", 0, 0, 0}, ++ { "fixup_LARCH_SOP_NOT", 0, 0, 0}, ++ { "fixup_LARCH_SOP_SUB", 0, 0, 0}, ++ { "fixup_LARCH_SOP_SL", 0, 0, 0}, ++ { "fixup_LARCH_SOP_SR", 0, 0, 0}, ++ { "fixup_LARCH_SOP_ADD", 0, 0, 0}, ++ { "fixup_LARCH_SOP_AND", 0, 0, 0}, ++ { "fixup_LARCH_SOP_IF_ELSE", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_10_5", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_U_10_12", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_10_12", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_10_16", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_10_16_S2", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_5_20", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2", 0, 0, 0}, ++ { "fixup_LARCH_SOP_POP_32_U", 0, 0, 0}, ++ { "fixup_LARCH_ADD8", 0, 0, 0}, ++ { "fixup_LARCH_ADD16", 0, 0, 0}, ++ { "fixup_LARCH_ADD24", 0, 0, 0}, ++ { "fixup_LARCH_ADD32", 0, 0, 0}, ++ { "fixup_LARCH_ADD64", 0, 0, 0}, ++ { "fixup_LARCH_SUB8", 0, 0, 0}, ++ { "fixup_LARCH_SUB16", 0, 0, 0}, ++ { "fixup_LARCH_SUB24", 0, 0, 0}, ++ { "fixup_LARCH_SUB32", 0, 0, 0}, ++ { "fixup_LARCH_SUB64", 0, 0, 0}, ++ }; ++ ++ if (Kind < FirstTargetFixupKind) ++ return MCAsmBackend::getFixupKindInfo(Kind); ++ ++ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && ++ "Invalid kind!"); ++ ++ return Infos[Kind - FirstTargetFixupKind]; ++} ++ ++/// WriteNopData - Write an (optimal) nop sequence of Count bytes ++/// to the given output. If the target cannot generate such a sequence, ++/// it should return an error. ++/// ++/// \return - True on success. + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { +- // Check for byte count not multiple of instruction word size +- if (Count % 4 != 0) ++ // Check for a less than instruction size number of bytes ++ if ((Count % 4) != 0) + return false; + + // The nop on LoongArch is andi r0, r0, 0. +@@ -53,16 +170,45 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + return true; + } + +-std::unique_ptr +-LoongArchAsmBackend::createObjectTargetWriter() const { +- return createLoongArchELFObjectWriter(OSABI, Is64Bit); ++bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, ++ const MCFixup &Fixup, ++ const MCValue &Target) { ++ const unsigned FixupKind = Fixup.getKind(); ++ switch (FixupKind) { ++ default: ++ return false; ++ // All these relocations require special processing ++ // at linking time. Delegate this work to a linker. ++ case LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL: ++ case LoongArch::fixup_LARCH_SOP_PUSH_PCREL: ++ case LoongArch::fixup_LARCH_SOP_PUSH_GPREL: ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD: ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT: ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL: ++ case LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE: ++ case LoongArch::fixup_LARCH_SOP_IF_ELSE: ++ case LoongArch::fixup_LARCH_SOP_ADD: ++ case LoongArch::fixup_LARCH_SOP_SUB: ++ case LoongArch::fixup_LARCH_SOP_AND: ++ case LoongArch::fixup_LARCH_SOP_SL: ++ case LoongArch::fixup_LARCH_SOP_SR: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_5: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_5_20: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_12: ++ case LoongArch::fixup_LARCH_SOP_POP_32_U_10_12: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2: ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2: ++ return true; ++ } + } + + MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { +- const Triple &TT = STI.getTargetTriple(); +- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); +- return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); ++ LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( ++ STI.getTargetTriple(), STI.getCPU(), Options); ++ return new LoongArchAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ++ ABI.IsLPX32()); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index a5f0b816c..45ae6af44 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -1,4 +1,4 @@ +-//===-- LoongArchAsmBackend.h - LoongArch Assembler Backend ---*- C++ -*---===// ++//===-- LoongArchAsmBackend.h - LoongArch Asm Backend ------------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -9,55 +9,83 @@ + // This file defines the LoongArchAsmBackend class. + // + //===----------------------------------------------------------------------===// ++// + + #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H + #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H + +-#include "MCTargetDesc/LoongArchBaseInfo.h" +-#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "llvm/ADT/Triple.h" + #include "llvm/MC/MCAsmBackend.h" +-#include "llvm/MC/MCFixupKindInfo.h" +-#include "llvm/MC/MCSubtargetInfo.h" + + namespace llvm { + ++class MCAssembler; ++struct MCFixupKindInfo; ++class MCObjectWriter; ++class MCRegisterInfo; ++class MCSymbolELF; ++class Target; ++ + class LoongArchAsmBackend : public MCAsmBackend { +- uint8_t OSABI; +- bool Is64Bit; ++ Triple TheTriple; ++ bool IsLPX32; + + public: +- LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) +- : MCAsmBackend(support::little), OSABI(OSABI), Is64Bit(Is64Bit) {} +- ~LoongArchAsmBackend() override {} ++ LoongArchAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, ++ StringRef CPU, bool LPX32) ++ : MCAsmBackend(support::little), ++ TheTriple(TT), IsLPX32(LPX32) { ++ assert(TT.isLittleEndian()); ++ } ++ ++ std::unique_ptr ++ createObjectTargetWriter() const override; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + +- bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, +- const MCValue &Target) override; ++ Optional getFixupKind(StringRef Name) const override; ++ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; ++ ++ unsigned getNumFixupKinds() const override { ++ return LoongArch::NumTargetFixupKinds; ++ } ++ ++ /// @name Target Relaxation Interfaces ++ /// @{ + +- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, +- const MCRelaxableFragment *DF, +- const MCAsmLayout &Layout) const override { ++ /// MayNeedRelaxation - Check whether the given instruction may need ++ /// relaxation. ++ /// ++ /// \param Inst - The instruction to test. ++ bool mayNeedRelaxation(const MCInst &Inst, ++ const MCSubtargetInfo &STI) const override { + return false; + } + +- unsigned getNumFixupKinds() const override { +- // FIXME: Implement this when we define fixup kind +- return 0; ++ /// fixupNeedsRelaxation - Target specific predicate for whether a given ++ /// fixup requires the associated instruction to be relaxed. ++ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, ++ const MCRelaxableFragment *DF, ++ const MCAsmLayout &Layout) const override { ++ // FIXME. ++ llvm_unreachable("RelaxInstruction() unimplemented"); ++ return false; + } + +- void relaxInstruction(MCInst &Inst, +- const MCSubtargetInfo &STI) const override {} ++ /// @} + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; + +- std::unique_ptr +- createObjectTargetWriter() const override; +-}; +-} // end namespace llvm ++ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, ++ const MCValue &Target) override; ++ ++}; // class LoongArchAsmBackend ++ ++} // namespace + +-#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +deleted file mode 100644 +index de2ba2833..000000000 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp ++++ /dev/null +@@ -1,40 +0,0 @@ +-//= LoongArchBaseInfo.cpp - Top level definitions for LoongArch MC -*- C++ -*-// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file implements helper functions for the LoongArch target useful for the +-// compiler back-end and the MC libraries. +-// +-//===----------------------------------------------------------------------===// +- +-#include "LoongArchBaseInfo.h" +-#include "llvm/ADT/ArrayRef.h" +-#include "llvm/ADT/Triple.h" +-#include "llvm/MC/MCSubtargetInfo.h" +- +-namespace llvm { +- +-namespace LoongArchABI { +- +-ABI getTargetABI(StringRef ABIName) { +- auto TargetABI = StringSwitch(ABIName) +- .Case("ilp32s", ABI_ILP32S) +- .Case("ilp32f", ABI_ILP32F) +- .Case("ilp32d", ABI_ILP32D) +- .Case("lp64s", ABI_LP64S) +- .Case("lp64f", ABI_LP64F) +- .Case("lp64d", ABI_LP64D) +- .Default(ABI_Unknown); +- return TargetABI; +-} +- +-// FIXME: other register? +-MCRegister getBPReg() { return LoongArch::R31; } +- +-} // end namespace LoongArchABI +- +-} // end namespace llvm +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +index fee247a0c..707333c18 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +@@ -1,4 +1,4 @@ +-//=- LoongArchBaseInfo.h - Top level definitions for LoongArch MC -*- C++ -*-=// ++//===-- LoongArchBaseInfo.h - Top level definitions for LoongArch MC ------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,39 +6,123 @@ + // + //===----------------------------------------------------------------------===// + // +-// This file contains small standalone enum definitions and helper function +-// definitions for the LoongArch target useful for the compiler back-end and the +-// MC libraries. ++// This file contains small standalone helper functions and enum definitions for ++// the LoongArch target useful for the compiler back-end and the MC libraries. + // + //===----------------------------------------------------------------------===// + #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H + #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H + +-#include "MCTargetDesc/LoongArchMCTargetDesc.h" +-#include "llvm/ADT/StringRef.h" +-#include "llvm/ADT/StringSwitch.h" +-#include "llvm/MC/MCInstrDesc.h" +-#include "llvm/MC/SubtargetFeature.h" ++#include "LoongArchFixupKinds.h" ++#include "LoongArchMCTargetDesc.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/Support/DataTypes.h" ++#include "llvm/Support/ErrorHandling.h" + + namespace llvm { + +-namespace LoongArchABI { +-enum ABI { +- ABI_ILP32S, +- ABI_ILP32F, +- ABI_ILP32D, +- ABI_LP64S, +- ABI_LP64F, +- ABI_LP64D, +- ABI_Unknown +-}; ++/// LoongArchII - This namespace holds all of the target specific flags that ++/// instruction info tracks. ++/// ++namespace LoongArchII { ++ /// Target Operand Flag enum. ++ enum TOF { ++ //===------------------------------------------------------------------===// ++ // LoongArch Specific MachineOperand flags. + +-ABI getTargetABI(StringRef ABIName); ++ MO_NO_FLAG, + +-// Returns the register used to hold the stack pointer after realignment. +-MCRegister getBPReg(); +-} // end namespace LoongArchABI ++ /// MO_ABS_XXX - Represents the hi or low part of an absolute symbol ++ /// address. ++ MO_ABS_HI, ++ MO_ABS_LO, ++ MO_ABS_HIGHER, ++ MO_ABS_HIGHEST, + +-} // end namespace llvm ++ /// MO_PCREL_XXX - Represents the hi or low part of an pc relative symbol ++ /// address. ++ MO_PCREL_HI, ++ MO_PCREL_LO, ++ // with tmp reg ++ MO_PCREL_RRHI, ++ MO_PCREL_RRLO, ++ MO_PCREL_RRHIGHER, ++ MO_PCREL_RRHIGHEST, + +-#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H ++ // LArch Tls gd and ld ++ MO_TLSGD_HI, ++ MO_TLSGD_LO, ++ // with tmp reg ++ MO_TLSGD_RRHI, ++ MO_TLSGD_RRLO, ++ MO_TLSGD_RRHIGHER, ++ MO_TLSGD_RRHIGHEST, ++ ++ // LArch thread tprel (ie/le) ++ // LArch Tls ie ++ MO_TLSIE_HI, ++ MO_TLSIE_LO, ++ // with tmp reg ++ MO_TLSIE_RRHI, ++ MO_TLSIE_RRLO, ++ MO_TLSIE_RRHIGHER, ++ MO_TLSIE_RRHIGHEST, ++ // LArch Tls le ++ MO_TLSLE_HI, ++ MO_TLSLE_LO, ++ MO_TLSLE_HIGHER, ++ MO_TLSLE_HIGHEST, ++ ++ // Loongarch got ++ MO_GOT_HI, ++ MO_GOT_LO, ++ // with tmp reg ++ MO_GOT_RRHI, ++ MO_GOT_RRLO, ++ MO_GOT_RRHIGHER, ++ MO_GOT_RRHIGHEST, ++ ++ MO_CALL_HI, ++ MO_CALL_LO, ++ }; ++ ++ enum { ++ //===------------------------------------------------------------------===// ++ // Instruction encodings. These are the standard/most common forms for ++ // LoongArch instructions. ++ // ++ ++ // Pseudo - This represents an instruction that is a pseudo instruction ++ // or one that has not been implemented yet. It is illegal to code generate ++ // it, but tolerated for intermediate implementation stages. ++ Pseudo = 0, ++ ++ /// FrmR - This form is for instructions of the format R. ++ FrmR = 1, ++ /// FrmI - This form is for instructions of the format I. ++ FrmI = 2, ++ /// FrmJ - This form is for instructions of the format J. ++ FrmJ = 3, ++ /// FrmFR - This form is for instructions of the format FR. ++ FrmFR = 4, ++ /// FrmFI - This form is for instructions of the format FI. ++ FrmFI = 5, ++ /// FrmOther - This form is for instructions that have no specific format. ++ FrmOther = 6, ++ ++ FormMask = 15, ++ /// IsCTI - Instruction is a Control Transfer Instruction. ++ IsCTI = 1 << 4, ++ /// HasForbiddenSlot - Instruction has a forbidden slot. ++ HasForbiddenSlot = 1 << 5, ++ /// IsPCRelativeLoad - A Load instruction with implicit source register ++ /// ($pc) with explicit offset and destination register ++ IsPCRelativeLoad = 1 << 6, ++ /// HasFCCRegOperand - Instruction uses an $fcc register. ++ HasFCCRegOperand = 1 << 7 ++ ++ }; ++} ++} ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index 1850b0d8a..e00b9af9d 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer ---*- C++ -*---===// ++//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer -------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -6,59 +6,181 @@ + // + //===----------------------------------------------------------------------===// + ++#include "MCTargetDesc/LoongArchFixupKinds.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/BinaryFormat/ELF.h" + #include "llvm/MC/MCContext.h" + #include "llvm/MC/MCELFObjectWriter.h" + #include "llvm/MC/MCFixup.h" + #include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" + #include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEBUG_TYPE "loongarch-elf-object-writer" + + using namespace llvm; + + namespace { ++ + class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { + public: +- LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); ++ LoongArchELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64); + +- ~LoongArchELFObjectWriter() override; ++ ~LoongArchELFObjectWriter() override = default; + +- // Return true if the given relocation must be with a symbol rather than +- // section plus offset. ++ unsigned getRelocType(MCContext &Ctx, const MCValue &Target, ++ const MCFixup &Fixup, bool IsPCRel) const override; + bool needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const override { + return true; + } +- +-protected: +- unsigned getRelocType(MCContext &Ctx, const MCValue &Target, +- const MCFixup &Fixup, bool IsPCRel) const override; + }; +-} // end namespace + +-LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) +- : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, +- /*HasRelocationAddend*/ true) {} ++} // end anonymous namespace + +-LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} ++LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, ++ bool HasRelocationAddend, bool Is64) ++ : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_LOONGARCH, HasRelocationAddend) {} + + unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, +- const MCValue &Target, +- const MCFixup &Fixup, +- bool IsPCRel) const { +- // Determine the type of the relocation +- unsigned Kind = Fixup.getTargetKind(); +- +- if (Kind >= FirstLiteralRelocationKind) +- return Kind - FirstLiteralRelocationKind; ++ const MCValue &Target, ++ const MCFixup &Fixup, ++ bool IsPCRel) const { ++ // Determine the type of the relocation. ++ ///XXX:Reloc ++ unsigned Kind = (unsigned)Fixup.getKind(); + + switch (Kind) { +- // TODO: Implement this when we defined fixup kind. +- default: +- return ELF::R_LARCH_NONE; ++ default: ++ return ELF::R_LARCH_NONE; ++ //llvm_unreachable("invalid fixup kind!"); ++ case FK_Data_4: ++ case LoongArch::fixup_LARCH_32: ++ return ELF::R_LARCH_32; ++ case FK_GPRel_4: ++ case FK_Data_8: ++ case LoongArch::fixup_LARCH_64: ++ return ELF::R_LARCH_64; ++ case LoongArch::fixup_LARCH_NONE: ++ return ELF::R_LARCH_NONE; ++ case LoongArch::fixup_LARCH_RELATIVE: ++ return ELF::R_LARCH_RELATIVE; ++ case LoongArch::fixup_LARCH_COPY: ++ return ELF::R_LARCH_COPY; ++ case LoongArch::fixup_LARCH_JUMP_SLOT: ++ return ELF::R_LARCH_JUMP_SLOT; ++ case LoongArch::fixup_LARCH_TLS_DTPMOD32: ++ return ELF::R_LARCH_TLS_DTPMOD32; ++ case LoongArch::fixup_LARCH_TLS_DTPMOD64: ++ return ELF::R_LARCH_TLS_DTPMOD64; ++ case LoongArch::fixup_LARCH_TLS_DTPREL32: ++ return ELF::R_LARCH_TLS_DTPREL32; ++ case LoongArch::fixup_LARCH_TLS_DTPREL64: ++ return ELF::R_LARCH_TLS_DTPREL64; ++ case LoongArch::fixup_LARCH_TLS_TPREL32: ++ return ELF::R_LARCH_TLS_TPREL32; ++ case LoongArch::fixup_LARCH_TLS_TPREL64: ++ return ELF::R_LARCH_TLS_TPREL64; ++ case LoongArch::fixup_LARCH_IRELATIVE: ++ return ELF::R_LARCH_IRELATIVE; ++ case LoongArch::fixup_LARCH_MARK_LA: ++ return ELF::R_LARCH_MARK_LA; ++ case LoongArch::fixup_LARCH_MARK_PCREL: ++ return ELF::R_LARCH_MARK_PCREL; ++ case LoongArch::fixup_LARCH_SOP_PUSH_PCREL: ++ return ELF::R_LARCH_SOP_PUSH_PCREL; ++ case LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE: ++ return ELF::R_LARCH_SOP_PUSH_ABSOLUTE; ++ case LoongArch::fixup_LARCH_SOP_PUSH_DUP: ++ return ELF::R_LARCH_SOP_PUSH_DUP; ++ case LoongArch::fixup_LARCH_SOP_PUSH_GPREL: ++ return ELF::R_LARCH_SOP_PUSH_GPREL; ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL: ++ return ELF::R_LARCH_SOP_PUSH_TLS_TPREL; ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT: ++ return ELF::R_LARCH_SOP_PUSH_TLS_GOT; ++ case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD: ++ return ELF::R_LARCH_SOP_PUSH_TLS_GD; ++ case LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL: ++ return ELF::R_LARCH_SOP_PUSH_PLT_PCREL; ++ case LoongArch::fixup_LARCH_SOP_ASSERT: ++ return ELF::R_LARCH_SOP_ASSERT; ++ case LoongArch::fixup_LARCH_SOP_NOT: ++ return ELF::R_LARCH_SOP_NOT; ++ case LoongArch::fixup_LARCH_SOP_SUB: ++ return ELF::R_LARCH_SOP_SUB; ++ case LoongArch::fixup_LARCH_SOP_SL: ++ return ELF::R_LARCH_SOP_SL; ++ case LoongArch::fixup_LARCH_SOP_SR: ++ return ELF::R_LARCH_SOP_SR; ++ case LoongArch::fixup_LARCH_SOP_ADD: ++ return ELF::R_LARCH_SOP_ADD; ++ case LoongArch::fixup_LARCH_SOP_AND: ++ return ELF::R_LARCH_SOP_AND; ++ case LoongArch::fixup_LARCH_SOP_IF_ELSE: ++ return ELF::R_LARCH_SOP_IF_ELSE; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_5: ++ return ELF::R_LARCH_SOP_POP_32_S_10_5; ++ case LoongArch::fixup_LARCH_SOP_POP_32_U_10_12: ++ return ELF::R_LARCH_SOP_POP_32_U_10_12; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_12: ++ return ELF::R_LARCH_SOP_POP_32_S_10_12; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16: ++ return ELF::R_LARCH_SOP_POP_32_S_10_16; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2: ++ return ELF::R_LARCH_SOP_POP_32_S_10_16_S2; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_5_20: ++ return ELF::R_LARCH_SOP_POP_32_S_5_20; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2: ++ return ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2; ++ case LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2: ++ return ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2; ++ case LoongArch::fixup_LARCH_SOP_POP_32_U: ++ return ELF::R_LARCH_SOP_POP_32_U; ++ case LoongArch::fixup_LARCH_ADD8: ++ return ELF::R_LARCH_ADD8; ++ case LoongArch::fixup_LARCH_ADD16: ++ return ELF::R_LARCH_ADD16; ++ case LoongArch::fixup_LARCH_ADD24: ++ return ELF::R_LARCH_ADD24; ++ case LoongArch::fixup_LARCH_ADD32: ++ return ELF::R_LARCH_ADD32; ++ case LoongArch::fixup_LARCH_ADD64: ++ return ELF::R_LARCH_ADD64; ++ case LoongArch::fixup_LARCH_SUB8: ++ return ELF::R_LARCH_SUB8; ++ case LoongArch::fixup_LARCH_SUB16: ++ return ELF::R_LARCH_SUB16; ++ case LoongArch::fixup_LARCH_SUB24: ++ return ELF::R_LARCH_SUB24; ++ case LoongArch::fixup_LARCH_SUB32: ++ return ELF::R_LARCH_SUB32; ++ case LoongArch::fixup_LARCH_SUB64: ++ return ELF::R_LARCH_SUB64; ++ case LoongArch::fixup_LARCH_GNU_VTINHERIT: ++ return ELF::R_LARCH_GNU_VTINHERIT; ++ case LoongArch::fixup_LARCH_GNU_VTENTRY: ++ return ELF::R_LARCH_GNU_VTENTRY; + } + } + + std::unique_ptr +-llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) { +- return std::make_unique(OSABI, Is64Bit); ++llvm::createLoongArchELFObjectWriter(const Triple &TT, bool IsLPX32) { ++ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); ++ bool IsLP64 = TT.isArch64Bit() && !IsLPX32; ++ bool HasRelocationAddend = TT.isArch64Bit(); ++ return std::make_unique(OSABI, HasRelocationAddend, ++ IsLP64); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +new file mode 100644 +index 000000000..a74fee3f8 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +@@ -0,0 +1,138 @@ ++//===-------- LoongArchELFStreamer.cpp - ELF Object Output ---------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchELFStreamer.h" ++#include "LoongArchFixupKinds.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCAsmBackend.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCDwarf.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/Support/Casting.h" ++ ++using namespace llvm; ++ ++static std::pair getRelocPairForSize(unsigned Size) { ++ switch (Size) { ++ default: ++ llvm_unreachable("unsupported fixup size"); ++ case 1: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD8, ++ LoongArch::fixup_LARCH_SUB8); ++ case 2: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD16, ++ LoongArch::fixup_LARCH_SUB16); ++ case 4: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD32, ++ LoongArch::fixup_LARCH_SUB32); ++ case 8: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD64, ++ LoongArch::fixup_LARCH_SUB64); ++ } ++} ++ ++static bool requiresFixups(MCContext &C, const MCExpr *Value, ++ const MCExpr *&LHS, const MCExpr *&RHS, ++ LoongArchELFStreamer *MCS) { ++ const auto *MBE = dyn_cast(Value); ++ if (MBE == nullptr) ++ return false; ++ ++ MCValue E; ++ if (!Value->evaluateAsRelocatable(E, nullptr, nullptr)) ++ return false; ++ if (E.getSymA() == nullptr || E.getSymB() == nullptr) ++ return false; ++ ++ const auto &A = E.getSymA()->getSymbol(); ++ const auto &B = E.getSymB()->getSymbol(); ++ ++ if (A.getName().empty() && B.getName().empty()) ++ return false; ++ ++ if (!A.isInSection() && !B.isInSection() && ++ !A.getName().empty() && !B.getName().empty()) ++ return false; ++ ++ LHS = ++ MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C), ++ MCConstantExpr::create(E.getConstant(), C), C); ++ RHS = E.getSymB(); ++ ++ bool isCheckInstr = ++ StringSwitch(MCS->getCurrentSectionOnly()->getName()) ++ .Case(".debug_aranges", true) ++ .Default(false); ++ ++ return (A.isInSection() ++ ? (isCheckInstr ? A.getSection().hasInstructions() : true) ++ : !A.getName().empty()) || ++ (B.isInSection() ? B.getSection().hasInstructions() ++ : !B.getName().empty()); ++} ++ ++LoongArchELFStreamer::LoongArchELFStreamer(MCContext &Context, ++ std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter) ++ : MCELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter)) { ++ } ++ ++void LoongArchELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { ++ Frame.Begin = getContext().createTempSymbol(); ++ MCELFStreamer::emitLabel(Frame.Begin); ++} ++ ++MCSymbol *LoongArchELFStreamer::emitCFILabel() { ++ MCSymbol *Label = getContext().createTempSymbol("cfi", true); ++ MCELFStreamer::emitLabel(Label); ++ return Label; ++} ++ ++void LoongArchELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { ++ Frame.End = getContext().createTempSymbol(); ++ MCELFStreamer::emitLabel(Frame.End); ++} ++ ++void LoongArchELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, ++ SMLoc Loc) { ++ const MCExpr *A, *B; ++ if (!requiresFixups(getContext(), Value, A, B, this)) ++ return MCELFStreamer::emitValueImpl(Value, Size, Loc); ++ ++ MCStreamer::emitValueImpl(Value, Size, Loc); ++ ++ MCDataFragment *DF = getOrCreateDataFragment(); ++ flushPendingLabels(DF, DF->getContents().size()); ++ MCDwarfLineEntry::make(this, getCurrentSectionOnly()); ++ ++ unsigned Add, Sub; ++ std::tie(Add, Sub) = getRelocPairForSize(Size); ++ ++ DF->getFixups().push_back(MCFixup::create( ++ DF->getContents().size(), A, static_cast(Add), Loc)); ++ DF->getFixups().push_back(MCFixup::create( ++ DF->getContents().size(), B, static_cast(Sub), Loc)); ++ ++ DF->getContents().resize(DF->getContents().size() + Size, 0); ++} ++ ++MCELFStreamer *llvm::createLoongArchELFStreamer( ++ MCContext &Context, std::unique_ptr MAB, ++ std::unique_ptr OW, std::unique_ptr Emitter, ++ bool RelaxAll) { ++ return new LoongArchELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter)); ++} +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h +new file mode 100644 +index 000000000..875cebcb7 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h +@@ -0,0 +1,53 @@ ++//===- LoongArchELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This is a custom MCELFStreamer which allows us to insert some hooks before ++// emitting data into an actual object file. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H ++ ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/MC/MCELFStreamer.h" ++#include ++ ++namespace llvm { ++ ++class MCAsmBackend; ++class MCCodeEmitter; ++class MCContext; ++class MCSubtargetInfo; ++struct MCDwarfFrameInfo; ++ ++class LoongArchELFStreamer : public MCELFStreamer { ++ ++public: ++ LoongArchELFStreamer(MCContext &Context, std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter); ++ ++ /// Overriding these functions allows us to dismiss all labels. ++ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; ++ ++ // Overriding these functions allows us to avoid recording of these labels ++ // in emitLabel. ++ void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; ++ void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; ++ MCSymbol *emitCFILabel() override; ++}; ++ ++MCELFStreamer *createLoongArchELFStreamer(MCContext &Context, ++ std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter, ++ bool RelaxAll); ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +new file mode 100644 +index 000000000..e0e1200d8 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +@@ -0,0 +1,90 @@ ++//===-- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries ----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H ++ ++#include "llvm/MC/MCFixup.h" ++ ++namespace llvm { ++namespace LoongArch { ++ // Although most of the current fixup types reflect a unique relocation ++ // one can have multiple fixup types for a given relocation and thus need ++ // to be uniquely named. ++ // ++ // This table *must* be in the same order of ++ // MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds] ++ // in LoongArchAsmBackend.cpp. ++ // ++ enum Fixups { ++ // R_LARCH_NONE. ++ fixup_LARCH_NONE = FirstTargetFixupKind, ++ ++ // reloc_hint ++ fixup_LARCH_SOP_PUSH_ABSOLUTE, ++ fixup_LARCH_SOP_PUSH_PCREL, ++ fixup_LARCH_SOP_PUSH_GPREL, ++ fixup_LARCH_SOP_PUSH_TLS_TPREL, ++ fixup_LARCH_SOP_PUSH_TLS_GOT, ++ fixup_LARCH_SOP_PUSH_TLS_GD, ++ fixup_LARCH_SOP_PUSH_PLT_PCREL, ++ // fixup methods ++ fixup_LARCH_32, ++ fixup_LARCH_64, ++ fixup_LARCH_RELATIVE, ++ fixup_LARCH_COPY, ++ fixup_LARCH_JUMP_SLOT, ++ fixup_LARCH_TLS_DTPMOD32, ++ fixup_LARCH_TLS_DTPMOD64, ++ fixup_LARCH_TLS_DTPREL32, ++ fixup_LARCH_TLS_DTPREL64, ++ fixup_LARCH_TLS_TPREL32, ++ fixup_LARCH_TLS_TPREL64, ++ fixup_LARCH_IRELATIVE, ++ fixup_LARCH_MARK_LA, ++ fixup_LARCH_MARK_PCREL, ++ fixup_LARCH_SOP_PUSH_DUP, ++ fixup_LARCH_SOP_ASSERT, ++ fixup_LARCH_SOP_NOT, ++ fixup_LARCH_SOP_SUB, ++ fixup_LARCH_SOP_SL, ++ fixup_LARCH_SOP_SR, ++ fixup_LARCH_SOP_ADD, ++ fixup_LARCH_SOP_AND, ++ fixup_LARCH_SOP_IF_ELSE, ++ fixup_LARCH_SOP_POP_32_S_10_5, ++ fixup_LARCH_SOP_POP_32_U_10_12, ++ fixup_LARCH_SOP_POP_32_S_10_12, ++ fixup_LARCH_SOP_POP_32_S_10_16, ++ fixup_LARCH_SOP_POP_32_S_10_16_S2, ++ fixup_LARCH_SOP_POP_32_S_5_20, ++ fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2, ++ fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2, ++ fixup_LARCH_SOP_POP_32_U, ++ fixup_LARCH_ADD8, ++ fixup_LARCH_ADD16, ++ fixup_LARCH_ADD24, ++ fixup_LARCH_ADD32, ++ fixup_LARCH_ADD64, ++ fixup_LARCH_SUB8, ++ fixup_LARCH_SUB16, ++ fixup_LARCH_SUB24, ++ fixup_LARCH_SUB32, ++ fixup_LARCH_SUB64, ++ fixup_LARCH_GNU_VTINHERIT, ++ fixup_LARCH_GNU_VTENTRY, ++ ++ // Marker ++ LastTargetFixupKind, ++ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind ++ }; ++} // namespace LoongArch ++} // namespace llvm ++ ++ ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +index 66183868f..065020ad4 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +@@ -1,4 +1,4 @@ +-//===- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to asm syntax --===// ++//===-- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to assembly syntax ------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,53 +11,242 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchInstPrinter.h" +-#include "LoongArchBaseInfo.h" +-#include "llvm/MC/MCAsmInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "LoongArchInstrInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/StringExtras.h" ++#include "llvm/MC/MCExpr.h" + #include "llvm/MC/MCInst.h" +-#include "llvm/MC/MCRegisterInfo.h" +-#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCInstrInfo.h" + #include "llvm/MC/MCSymbol.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" + using namespace llvm; + +-#define DEBUG_TYPE "loongarch-asm-printer" ++#define DEBUG_TYPE "asm-printer" + +-// Include the auto-generated portion of the assembly writer. + #define PRINT_ALIAS_INSTR + #include "LoongArchGenAsmWriter.inc" + ++template ++static bool isReg(const MCInst &MI, unsigned OpNo) { ++ assert(MI.getOperand(OpNo).isReg() && "Register operand expected."); ++ return MI.getOperand(OpNo).getReg() == R; ++} ++ ++const char* LoongArch::LoongArchFCCToString(LoongArch::CondCode CC) { ++ switch (CC) { ++ case FCOND_T: ++ case FCOND_F: return "caf"; ++ case FCOND_OR: ++ case FCOND_UN: return "cun"; ++ case FCOND_UNE: ++ case FCOND_OEQ: return "ceq"; ++ case FCOND_ONE: ++ case FCOND_UEQ: return "cueq"; ++ case FCOND_UGE: ++ case FCOND_OLT: return "clt"; ++ case FCOND_OGE: ++ case FCOND_ULT: return "cult"; ++ case FCOND_UGT: ++ case FCOND_OLE: return "cle"; ++ case FCOND_OGT: ++ case FCOND_ULE: return "cule"; ++ case FCOND_ST: ++ case FCOND_SF: return "saf"; ++ case FCOND_GLE: ++ case FCOND_NGLE:return "sun"; ++ case FCOND_SEQ: return "seq"; ++ case FCOND_SNE: return "sne"; ++ case FCOND_GL: ++ case FCOND_NGL: return "sueq"; ++ case FCOND_NLT: ++ case FCOND_LT: return "slt"; ++ case FCOND_GE: ++ case FCOND_NGE: return "sult"; ++ case FCOND_NLE: ++ case FCOND_LE: return "sle"; ++ case FCOND_GT: ++ case FCOND_NGT: return "sule"; ++ case FCOND_CNE: return "cne"; ++ case FCOND_COR: return "cor"; ++ case FCOND_SOR: return "sor"; ++ case FCOND_CUNE: return "cune"; ++ case FCOND_SUNE: return "sune"; ++ } ++ llvm_unreachable("Impossible condition code!"); ++} ++ ++void LoongArchInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { ++ OS << '$' << StringRef(getRegisterName(RegNo)).lower(); ++} ++ + void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, + const MCSubtargetInfo &STI, + raw_ostream &O) { +- if (!printAliasInstr(MI, Address, STI, O)) +- printInstruction(MI, Address, STI, O); +- printAnnotation(O, Annot); +-} ++ switch (MI->getOpcode()) { ++ default: ++ break; ++ case LoongArch::PCADDU12I_ri: ++ case LoongArch::PCADDU12I_rii: ++ case LoongArch::LU12I_W_ri: ++ printLoadAddr(MI, O); ++ return; ++ case LoongArch::ADD_D_rrr: ++ case LoongArch::LDX_D_rrr: ++ case LoongArch::ADDI_D_rri: ++ case LoongArch::ADDI_D_rrii: ++ case LoongArch::LD_D_rri: ++ case LoongArch::LD_D_rrii: ++ case LoongArch::ORI_rri: ++ case LoongArch::ORI_rrii: ++ case LoongArch::LU32I_D_ri: ++ case LoongArch::LU32I_D_rii: ++ case LoongArch::LU52I_D_rri: ++ case LoongArch::LU52I_D_rrii: ++ O << "\t# la expanded slot"; ++ return; ++ } + +-void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { +- O << '$' << getRegisterName(RegNo); ++ // Try to print any aliases first. ++ if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O)) ++ printInstruction(MI, Address, O); ++ printAnnotation(O, Annot); + } + + void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, +- const MCSubtargetInfo &STI, +- raw_ostream &O) { +- const MCOperand &MO = MI->getOperand(OpNo); ++ raw_ostream &O) { ++ const MCOperand &Op = MI->getOperand(OpNo); ++ if (Op.isReg()) { ++ printRegName(O, Op.getReg()); ++ return; ++ } + +- if (MO.isReg()) { +- printRegName(O, MO.getReg()); ++ if (Op.isImm()) { ++ O << formatImm(Op.getImm()); + return; + } + ++ assert(Op.isExpr() && "unknown operand kind in printOperand"); ++ Op.getExpr()->print(O, &MAI, true); ++} ++ ++template ++void LoongArchInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { ++ const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { +- O << MO.getImm(); ++ uint64_t Imm = MO.getImm(); ++ Imm -= Offset; ++ Imm &= (1 << Bits) - 1; ++ Imm += Offset; ++ O << formatImm(Imm); + return; + } + +- assert(MO.isExpr() && "Unknown operand kind in printOperand"); +- MO.getExpr()->print(O, &MAI); ++ printOperand(MI, opNum, O); ++} ++ ++void LoongArchInstPrinter:: ++printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { ++ // Load/Store memory operands -- $reg, imm ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchInstPrinter::printAMemOperand(const MCInst *MI, int opNum, ++ raw_ostream &O) { ++ // AM* instruction memory operand: "rj, 0" ++ printRegName(O, MI->getOperand(opNum).getReg()); ++ O << ", 0"; ++} ++ ++void LoongArchInstPrinter:: ++printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { ++ // when using stack locations for not load/store instructions ++ // print the same way as all normal 3 operand instructions. ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchInstPrinter:: ++printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { ++ const MCOperand& MO = MI->getOperand(opNum); ++ O << LoongArchFCCToString((LoongArch::CondCode)MO.getImm()); ++} ++ ++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, ++ unsigned OpNo, raw_ostream &OS) { ++ OS << "\t" << Str << "\t"; ++ if(MI.getOpcode() == LoongArch::JIRL) { ++ printOperand(&MI, OpNo, OS); ++ OS << "@plt"; ++ }else ++ printOperand(&MI, OpNo, OS); ++ return true; ++} ++ ++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, ++ unsigned OpNo0, unsigned OpNo1, ++ raw_ostream &OS) { ++ printAlias(Str, MI, OpNo0, OS); ++ OS << ", "; ++ printOperand(&MI, OpNo1, OS); ++ return true; ++} ++ ++bool LoongArchInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { ++ switch (MI.getOpcode()) { ++ case LoongArch::OR: ++ // or $r0, $r1, $zero => move $r0, $r1 ++ return isReg(MI, 2) && printAlias("move", MI, 0, 1, OS); ++ default: return false; ++ } ++} ++ ++void LoongArchInstPrinter:: ++printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) { ++ // - 2 because register List is always first operand of instruction and it is ++ // always followed by memory operand (base + offset). ++ for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) { ++ if (i != opNum) ++ O << ", "; ++ printRegName(O, MI->getOperand(i).getReg()); ++ } + } + +-const char *LoongArchInstPrinter::getRegisterName(unsigned RegNo) { +- // Default print reg alias name +- return getRegisterName(RegNo, LoongArch::RegAliasName); ++void LoongArchInstPrinter:: ++printLoadAddr(const MCInst *MI, raw_ostream &O) { ++ const MCOperand &Op = MI->getOperand(1); ++ const MCExpr *Expr = Op.getExpr(); ++ const LoongArchMCExpr *LoongArchExpr = cast(Expr); ++ switch (LoongArchExpr->getKind()) { ++ default: ++ llvm_unreachable("invalid handled!"); ++ return; ++ case LoongArchMCExpr::MEK_ABS_HI: ++ O << "\tla.abs\t"; ++ break; ++ case LoongArchMCExpr::MEK_GOT_HI: ++ O << "\tla.got\t"; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_HI: ++ O << "\tla.pcrel\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_HI: ++ O << "\tla.tls.gd\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_HI: ++ O << "\tla.tls.ie\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HI: ++ O << "\tla.tls.le\t"; ++ break; ++ } ++ printRegName(O, MI->getOperand(0).getReg()); ++ O << ", "; ++ Expr->print(O, nullptr); ++ return; + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +index 0cbb3d73c..050dcc137 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +@@ -1,4 +1,4 @@ +-//===-- LoongArchInstPrinter.h - Convert LoongArch MCInst to asm syntax ---===// ++//=== LoongArchInstPrinter.h - Convert LoongArch MCInst to assembly syntax -*- C++ -*-==// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -10,40 +10,110 @@ + // + //===----------------------------------------------------------------------===// + +-#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H +-#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H +- +-#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#ifndef LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H ++#define LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H + #include "llvm/MC/MCInstPrinter.h" + + namespace llvm { + ++namespace LoongArch { ++// LoongArch Branch Codes ++enum FPBranchCode { ++ BRANCH_F, ++ BRANCH_T, ++ BRANCH_INVALID ++}; ++ ++// LoongArch Condition Codes ++enum CondCode { ++ FCOND_F = 0x0, ++ FCOND_SF, ++ FCOND_OLT, ++ FCOND_LT, ++ FCOND_OEQ, ++ FCOND_SEQ, ++ FCOND_OLE, ++ FCOND_LE, ++ FCOND_UN, ++ FCOND_NGLE, ++ FCOND_ULT, ++ FCOND_NGE, ++ FCOND_UEQ, ++ FCOND_NGL, ++ FCOND_ULE, ++ FCOND_NGT, ++ FCOND_CNE, ++ FCOND_SNE, ++ FCOND_COR = 0x14, ++ FCOND_SOR = 0x15, ++ FCOND_CUNE = 0x18, ++ FCOND_SUNE = 0x19, ++ ++ // To be used with float branch False ++ // This conditions have the same mnemonic as the ++ // above ones, but are used with a branch False; ++ FCOND_T, ++ FCOND_UNE, ++ FCOND_ST, ++ FCOND_UGE, ++ FCOND_NLT, ++ FCOND_UGT, ++ FCOND_NLE, ++ FCOND_OR, ++ FCOND_GLE, ++ FCOND_OGE, ++ FCOND_GE, ++ FCOND_ONE, ++ FCOND_GL, ++ FCOND_OGT, ++ FCOND_GT ++}; ++ ++const char *LoongArchFCCToString(LoongArch::CondCode CC); ++} // end namespace LoongArch ++ + class LoongArchInstPrinter : public MCInstPrinter { + public: + LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, +- const MCRegisterInfo &MRI) +- : MCInstPrinter(MAI, MII, MRI) {} ++ const MCRegisterInfo &MRI) ++ : MCInstPrinter(MAI, MII, MRI) {} + ++ // Autogenerated by tblgen. ++ std::pair getMnemonic(const MCInst *MI) override; ++ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); ++ static const char *getRegisterName(unsigned RegNo); ++ ++ void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; +- void printRegName(raw_ostream &O, unsigned RegNo) const override; + +- // Autogenerated by tblgen. +- std::pair getMnemonic(const MCInst *MI) override; +- void printInstruction(const MCInst *MI, uint64_t Address, +- const MCSubtargetInfo &STI, raw_ostream &O); +- bool printAliasInstr(const MCInst *MI, uint64_t Address, +- const MCSubtargetInfo &STI, raw_ostream &O); ++ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, uint64_t Address, + unsigned OpIdx, unsigned PrintMethodIdx, +- const MCSubtargetInfo &STI, raw_ostream &O); +- static const char *getRegisterName(unsigned RegNo); +- static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); ++ raw_ostream &O); + + private: +- void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, +- raw_ostream &O); ++ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); ++ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, ++ raw_ostream &O) { ++ printOperand(MI, OpNum, O); ++ } ++ template ++ void printUImm(const MCInst *MI, int opNum, raw_ostream &O); ++ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); ++ void printAMemOperand(const MCInst *MI, int opNum, raw_ostream &O); ++ void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); ++ void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); ++ ++ bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, ++ raw_ostream &OS); ++ bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, ++ unsigned OpNo1, raw_ostream &OS); ++ bool printAlias(const MCInst &MI, raw_ostream &OS); ++ void printSaveRestore(const MCInst *MI, raw_ostream &O); ++ void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O); ++ void printLoadAddr(const MCInst *MI, raw_ostream &O); + }; + } // end namespace llvm + +-#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +index bc946db2f..b3091a107 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm properties ------*- C++ -*--===// ++//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm Properties ---------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -12,23 +12,28 @@ + + #include "LoongArchMCAsmInfo.h" + #include "llvm/ADT/Triple.h" +-#include "llvm/BinaryFormat/Dwarf.h" +-#include "llvm/MC/MCStreamer.h" + + using namespace llvm; + +-void LoongArchMCAsmInfo::anchor() {} ++void LoongArchMCAsmInfo::anchor() { } + +-LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TT) { +- CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; +- AlignmentIsInBytes = false; +- Data8bitsDirective = "\t.byte\t"; +- Data16bitsDirective = "\t.half\t"; +- Data32bitsDirective = "\t.word\t"; +- Data64bitsDirective = "\t.dword\t"; +- ZeroDirective = "\t.space\t"; +- CommentString = "#"; ++LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TheTriple, ++ const MCTargetOptions &Options) { ++ ++ if (TheTriple.isLoongArch64() ++ && TheTriple.getEnvironment() != Triple::GNUABILPX32) ++ CodePointerSize = CalleeSaveStackSlotSize = 8; ++ ++ AlignmentIsInBytes = false; ++ Data16bitsDirective = "\t.half\t"; ++ Data32bitsDirective = "\t.word\t"; ++ Data64bitsDirective = "\t.dword\t"; ++ CommentString = "#"; ++ ZeroDirective = "\t.space\t"; + SupportsDebugInformation = true; +- DwarfRegNumForCFI = true; + ExceptionsType = ExceptionHandling::DwarfCFI; ++ DwarfRegNumForCFI = true; ++ //HasLoongArchExpressions = true; ++ UseIntegratedAssembler = true; ++ UsesELFSectionDirectiveForBSS = true; + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +index ed1abbf46..244db58db 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +@@ -1,4 +1,4 @@ +-//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info --------------*- C++ -*--===// ++//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info ------------------------*- C++ -*--===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -22,9 +22,10 @@ class LoongArchMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + + public: +- explicit LoongArchMCAsmInfo(const Triple &TargetTriple); ++ explicit LoongArchMCAsmInfo(const Triple &TheTriple, ++ const MCTargetOptions &Options); + }; + +-} // end namespace llvm ++} // namespace llvm + +-#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 01a370a90..df4e72e90 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -1,4 +1,4 @@ +-//=- LoongArchMCCodeEmitter.cpp - Convert LoongArch code to machine code --===// ++//===-- LoongArchMCCodeEmitter.cpp - Convert LoongArch Code to Machine Code ---------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -10,118 +10,1422 @@ + // + //===----------------------------------------------------------------------===// + +-#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "LoongArchMCCodeEmitter.h" ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" +-#include "llvm/MC/MCCodeEmitter.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/APInt.h" ++#include "llvm/ADT/SmallVector.h" + #include "llvm/MC/MCContext.h" +-#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCFixup.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstrDesc.h" + #include "llvm/MC/MCInstrInfo.h" + #include "llvm/MC/MCRegisterInfo.h" +-#include "llvm/Support/EndianStream.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include + + using namespace llvm; + + #define DEBUG_TYPE "mccodeemitter" + +-namespace { +-class LoongArchMCCodeEmitter : public MCCodeEmitter { +- LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; +- void operator=(const LoongArchMCCodeEmitter &) = delete; +- MCContext &Ctx; +- MCInstrInfo const &MCII; +- +-public: +- LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) +- : Ctx(ctx), MCII(MCII) {} +- +- ~LoongArchMCCodeEmitter() override {} +- +- void encodeInstruction(const MCInst &MI, raw_ostream &OS, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const override; +- +- /// TableGen'erated function for getting the binary encoding for an +- /// instruction. +- uint64_t getBinaryCodeForInstr(const MCInst &MI, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const; +- +- /// Return binary encoding of operand. If the machine operand requires +- /// relocation, record the relocation and return zero. +- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const; +- +- /// Return binary encoding of an immediate operand specified by OpNo. +- /// The value returned is the value of the immediate minus 1. +- /// Note that this function is dedicated to specific immediate types, +- /// e.g. uimm2_plus1. +- unsigned getImmOpValueSub1(const MCInst &MI, unsigned OpNo, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const; +- +- /// Return binary encoding of an immediate operand specified by OpNo. +- /// The value returned is the value of the immediate shifted right +- // arithmetically by 2. +- /// Note that this function is dedicated to specific immediate types, +- /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2. +- unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const; +-}; +-} // end namespace ++#define GET_INSTRMAP_INFO ++#include "LoongArchGenInstrInfo.inc" ++#undef GET_INSTRMAP_INFO + +-unsigned +-LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const { ++namespace llvm { + +- if (MO.isReg()) +- return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); ++MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, ++ MCContext &Ctx) { ++ return new LoongArchMCCodeEmitter(MCII, Ctx); ++} + +- if (MO.isImm()) +- return static_cast(MO.getImm()); ++} // end namespace llvm + +- llvm_unreachable("Unhandled expression!"); ++void LoongArchMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const { ++ OS << (char)C; + } + +-unsigned +-LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const { +- return MI.getOperand(OpNo).getImm() - 1; ++void LoongArchMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size, ++ const MCSubtargetInfo &STI, ++ raw_ostream &OS) const { ++ for (unsigned i = 0; i < Size; ++i) { ++ unsigned Shift = i * 8; ++ EmitByte((Val >> Shift) & 0xff, OS); ++ } + } + +-unsigned +-LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, +- SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const { +- unsigned Res = MI.getOperand(OpNo).getImm(); +- assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); +- return Res >> 2; +-} +- +-void LoongArchMCCodeEmitter::encodeInstruction( +- const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, +- const MCSubtargetInfo &STI) const { +- const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); +- // Get byte count of instruction. ++/// encodeInstruction - Emit the instruction. ++/// Size the instruction with Desc.getSize(). ++void LoongArchMCCodeEmitter:: ++encodeInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const ++{ ++ MCInst TmpInst = MI; ++ ++ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); ++ ++ const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); ++ ++ // Get byte count of instruction + unsigned Size = Desc.getSize(); ++ if (!Size) ++ llvm_unreachable("Desc.getSize() returns 0"); ++ ++ EmitInstruction(Binary, Size, STI, OS); ++} ++ ++/// getBranchTargetOpValue - Return binary encoding of the branch ++/// target operand. If the machine operand requires relocation, ++/// record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); + +- switch (Size) { ++ // If the destination is an immediate, divide by 4. ++ if (MO.isImm()) return MO.getImm() >> 2; ++ ++ assert(MO.isExpr() && ++ "getBranchTargetOpValue expects only expressions or immediates"); ++ ++ // XXX: brtarget reloc EncoderMethod. ++ const MCExpr *Expr = MO.getExpr(); ++ int64_t Value = 0x0; ++ const MCConstantExpr *tmpExpr = MCConstantExpr::create(Value, Ctx); ++ Fixups.push_back(MCFixup::create(0, Expr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_PCREL))); ++ switch (MI.getOpcode()) { + default: +- llvm_unreachable("Unhandled encodeInstruction length!"); +- case 4: { +- uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); +- support::endian::write(OS, Bits, support::little); ++ llvm_unreachable("Unhandled reloc instruction!"); ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BEQZ32: ++ case LoongArch::BNEZ: ++ case LoongArch::BNEZ32: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ Fixups.push_back(MCFixup::create(0, tmpExpr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2))); + break; ++ case LoongArch::BEQ: ++ case LoongArch::BEQ32: ++ case LoongArch::BNE: ++ case LoongArch::BNE32: ++ case LoongArch::BLT: ++ case LoongArch::BLT32: ++ case LoongArch::BGE: ++ case LoongArch::BGE32: ++ case LoongArch::BLTU: ++ case LoongArch::BLTU32: ++ case LoongArch::BGEU: ++ case LoongArch::BGEU32: ++ Fixups.push_back(MCFixup::create(0, tmpExpr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); ++ break; ++ } ++ return 0; ++} ++ ++/// getJumpTargetOpValue - Return binary encoding of the jump ++/// target operand. If the machine operand requires relocation, ++/// record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ // If the destination is an immediate, divide by 4. ++ if (MO.isImm()) return MO.getImm()>>2; ++ ++ assert(MO.isExpr() && ++ "getJumpTargetOpValue expects only expressions or an immediate"); ++ ++ const MCExpr *Expr = MO.getExpr(); ++ int64_t Value = 0x0; ++ const MCConstantExpr *tmpExpr = MCConstantExpr::create(Value, Ctx); ++ Fixups.push_back(MCFixup::create(0, Expr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL))); ++ if (MI.getOpcode() == LoongArch::JIRL) ++ Fixups.push_back(MCFixup::create(0, tmpExpr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); ++ else // B or BL ++ Fixups.push_back(MCFixup::create(0, tmpExpr, ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2))); ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 1; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 2; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 3; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 1; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 2; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 3; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getExprOpValue(const MCInst &MI, const MCExpr *Expr, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ int64_t Res; ++ ++ if (Expr->evaluateAsAbsolute(Res)) ++ return Res; ++ ++ MCExpr::ExprKind Kind = Expr->getKind(); ++ if (Kind == MCExpr::Constant) { ++ return cast(Expr)->getValue(); ++ } ++ ++ if (Kind == MCExpr::Binary) { ++ unsigned Res = getExprOpValue(MI, cast(Expr)->getLHS(), Fixups, STI); ++ Res += getExprOpValue(MI, cast(Expr)->getRHS(), Fixups, STI); ++ return Res; ++ } ++ ++ if (Kind == MCExpr::Target) { ++ int64_t Value = 0x0; ++ const LoongArchMCExpr *LoongArchExpr = cast(Expr); ++ const MCExpr *BinExpr = nullptr; ++ const MCExpr *GOTExpr = nullptr; ++ const MCSymbol *GOTSymbol = Ctx.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); ++ ++ LoongArch::Fixups FixupKind = LoongArch::Fixups(0); ++ switch (LoongArchExpr->getKind()) { ++ case LoongArchMCExpr::MEK_None: ++ case LoongArchMCExpr::MEK_Special: ++ llvm_unreachable("Unhandled fixup kind!"); ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ break; ++ case LoongArchMCExpr::MEK_PLT: ++ Value = 0x0; ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ if (MI.getOpcode() == LoongArch::JIRL) ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); ++ else // B or BL ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2))); ++ break; ++ case LoongArchMCExpr::MEK_CALL_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ ++ Value = 0x20000; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x12; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ ++ break; ++ case LoongArchMCExpr::MEK_CALL_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ ++ Value = 0x4; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x20004; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x12; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x12; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x800; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x804; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRHI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000000; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRLO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000004; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRHIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x80000008; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRHIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x8000000c; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_ABS_HI: ++ FixupKind = LoongArch::fixup_LARCH_MARK_LA; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_ABS_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_ABS_HIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_ABS_HIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x800; ++ BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x804; ++ BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRHI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000000; ++ BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRLO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000004; ++ BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRHIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000008; ++ BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRHIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x8000000c; ++ BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x800; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x804; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_RRHI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000000; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_RRLO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000004; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_RRHIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x80000008; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_RRHIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x8000000c; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x800; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x804; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRHI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000000; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRLO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x4; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ Value = 0x80000004; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRHIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x80000008; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRHIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; ++ GOTExpr = MCSymbolRefExpr::create(GOTSymbol, ++ MCSymbolRefExpr::VK_None, Ctx); ++ Value = 0x8000000c; ++ BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); ++ Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HI: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ ++ Value = 0x20; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_LO: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0xfff; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HIGHER: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0xc; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); ++ Value = 0x2c; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HIGHEST: ++ FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ Value = 0x34; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); ++ Value = 0x0; ++ Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), ++ MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); ++ break; ++ } ++ return 0; + } ++ ++ if (Kind == MCExpr::SymbolRef) { ++ LoongArch::Fixups FixupKind = LoongArch::Fixups(0); ++ ++ switch(cast(Expr)->getKind()) { ++ default: llvm_unreachable("Unknown fixup kind!"); ++ break; ++ } ++ Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); ++ return 0; ++ } ++ return 0; ++} ++ ++/// getMachineOpValue - Return binary encoding of operand. If the machine ++/// operand requires relocation, record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getMachineOpValue(const MCInst &MI, const MCOperand &MO, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ if (MO.isReg()) { ++ unsigned Reg = MO.getReg(); ++ unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); ++ return RegNo; ++ } else if (MO.isImm()) { ++ return static_cast(MO.getImm()); ++ } else if (MO.isDFPImm()) { ++ return static_cast(bit_cast(MO.getDFPImm())); + } ++ // MO must be an Expr. ++ assert(MO.isExpr()); ++ return getExprOpValue(MI, MO.getExpr(),Fixups, STI); + } + +-MCCodeEmitter *llvm::createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, +- MCContext &Ctx) { +- return new LoongArchMCCodeEmitter(Ctx, MCII); ++/// Return binary encoding of memory related operand. ++/// If the offset operand requires relocation, record the relocation. ++template ++unsigned LoongArchMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 12; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= ShiftAmount; ++ ++ return (OffBits & 0xFFF) | RegBits; ++} ++ ++/// Return binary encoding of AM* memory related operand. ++unsigned ++LoongArchMCCodeEmitter::getAMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, bits 11-0 are not used. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) ++ << 12; ++ return RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding10l2(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 10; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 2; ++ ++ return (OffBits & 0x3FF) | RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding11l1(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 11; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 1; ++ ++ return (OffBits & 0x7FF) | RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding9l3(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 9; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 3; ++ ++ return (OffBits & 0x1FF) | RegBits; ++} ++ ++/// Return binary encoding of simm14 memory related operand. Such as LL/SC instructions. ++/// If the offset operand requires relocation, record the relocation. ++template ++unsigned LoongArchMCCodeEmitter::getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 18-14, offset is encoded in bits 13-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 14; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= ShiftAmount; ++ ++ return (OffBits & 0x3FFF) | RegBits; ++} ++ ++unsigned ++LoongArchMCCodeEmitter::getFCMPEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand& MO = MI.getOperand(OpNo); ++ switch((LoongArch::CondCode)MO.getImm()){ ++ case LoongArch::FCOND_T: ++ return 0x0; ++ case LoongArch::FCOND_OR: ++ return 0x8; ++ case LoongArch::FCOND_UNE: ++ return 0x4; ++ case LoongArch::FCOND_ONE: ++ return 0xC; ++ case LoongArch::FCOND_UGE: ++ return 0x2; ++ case LoongArch::FCOND_OGE: ++ return 0xA; ++ case LoongArch::FCOND_UGT: ++ return 0x6; ++ case LoongArch::FCOND_OGT: ++ return 0xE; ++ case LoongArch::FCOND_ST: ++ return 0x1; ++ case LoongArch::FCOND_GLE: ++ return 0x9; ++ case LoongArch::FCOND_GL: ++ return 0xD; ++ case LoongArch::FCOND_NLT: ++ return 0x3; ++ case LoongArch::FCOND_GE: ++ return 0xB; ++ case LoongArch::FCOND_NLE: ++ return 0x7; ++ case LoongArch::FCOND_GT: ++ return 0xF; ++ default: ++ return MO.getImm(); ++ } ++} ++ ++template ++unsigned ++LoongArchMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ assert(MI.getOperand(OpNo).isImm()); ++ unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); ++ Value -= Offset; ++ return Value; + } + + #include "LoongArchGenMCCodeEmitter.inc" +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h +new file mode 100644 +index 000000000..016340151 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h +@@ -0,0 +1,146 @@ ++//===- LoongArchMCCodeEmitter.h - Convert LoongArch Code to Machine Code --*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArchMCCodeEmitter class. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H ++ ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/ADT/StringRef.h" ++#include ++#include ++ ++namespace llvm { ++ ++class MCContext; ++class MCExpr; ++class MCFixup; ++class MCInst; ++class MCInstrInfo; ++class MCOperand; ++class MCSubtargetInfo; ++class raw_ostream; ++ ++class LoongArchMCCodeEmitter : public MCCodeEmitter { ++ const MCInstrInfo &MCII; ++ MCContext &Ctx; ++ ++public: ++ LoongArchMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_) ++ : MCII(mcii), Ctx(Ctx_) {} ++ LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; ++ LoongArchMCCodeEmitter &operator=(const LoongArchMCCodeEmitter &) = delete; ++ ~LoongArchMCCodeEmitter() override = default; ++ ++ void EmitByte(unsigned char C, raw_ostream &OS) const; ++ ++ void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI, ++ raw_ostream &OS) const; ++ ++ void encodeInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const override; ++ ++ // getBinaryCodeForInstr - TableGen'erated function for getting the ++ // binary encoding for an instruction. ++ uint64_t getBinaryCodeForInstr(const MCInst &MI, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getJumpTargetOpValue - Return binary encoding of the jump ++ // target operand. If the machine operand requires relocation, ++ // record the relocation and return zero. ++ unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getBranchTargetOpValue - Return binary encoding of the branch ++ // target operand. If the machine operand requires relocation, ++ // record the relocation and return zero. ++ unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getMachineOpValue - Return binary encoding of operand. If the machin ++ // operand requires relocation, record the relocation and return zero. ++ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ template ++ unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getAMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding10l2(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding11l1(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding9l3(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ template ++ unsigned getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getFCMPEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ /// Subtract Offset then encode as a N-bit unsigned integer. ++ template ++ unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getExprOpValue(const MCInst &MI, const MCExpr *Expr, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +new file mode 100644 +index 000000000..1af027f15 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +@@ -0,0 +1,158 @@ ++//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes --------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCExpr.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCAsmInfo.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCStreamer.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarchmcexpr" ++ ++const LoongArchMCExpr *LoongArchMCExpr::create(LoongArchMCExpr::LoongArchExprKind Kind, ++ const MCExpr *Expr, MCContext &Ctx) { ++ return new (Ctx) LoongArchMCExpr(Kind, Expr); ++} ++ ++void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { ++ int64_t AbsVal; ++ if (Expr->evaluateAsAbsolute(AbsVal)) ++ OS << AbsVal; ++ else ++ Expr->print(OS, MAI, true); ++} ++ ++bool ++LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res, ++ const MCAsmLayout *Layout, ++ const MCFixup *Fixup) const { ++ if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) ++ return false; ++ ++ if (Res.getRefKind() != MCSymbolRefExpr::VK_None) ++ return false; ++ ++ // evaluateAsAbsolute() and evaluateAsValue() require that we evaluate the ++ // %hi/%lo/etc. here. Fixup is a null pointer when either of these is the ++ // caller. ++ if (Res.isAbsolute() && Fixup == nullptr) { ++ int64_t AbsVal = Res.getConstant(); ++ switch (Kind) { ++ default: ++ break; ++ case MEK_None: ++ case MEK_Special: ++ llvm_unreachable("MEK_None and MEK_Special are invalid"); ++ } ++ Res = MCValue::get(AbsVal); ++ return true; ++ } ++ ++ // We want to defer it for relocatable expressions since the constant is ++ // applied to the whole symbol value. ++ // ++ // The value of getKind() that is given to MCValue is only intended to aid ++ // debugging when inspecting MCValue objects. It shouldn't be relied upon ++ // for decision making. ++ Res = MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); ++ ++ return true; ++} ++ ++void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const { ++ Streamer.visitUsedExpr(*getSubExpr()); ++} ++ ++static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { ++ switch (Expr->getKind()) { ++ case MCExpr::Target: ++ fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); ++ break; ++ case MCExpr::Constant: ++ break; ++ case MCExpr::Binary: { ++ const MCBinaryExpr *BE = cast(Expr); ++ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); ++ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); ++ break; ++ } ++ case MCExpr::SymbolRef: { ++ // We're known to be under a TLS fixup, so any symbol should be ++ // modified. There should be only one. ++ const MCSymbolRefExpr &SymRef = *cast(Expr); ++ cast(SymRef.getSymbol()).setType(ELF::STT_TLS); ++ break; ++ } ++ case MCExpr::Unary: ++ fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); ++ break; ++ } ++} ++ ++void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { ++ switch (getKind()) { ++ default: ++ break; ++ case MEK_None: ++ case MEK_Special: ++ llvm_unreachable("MEK_None and MEK_Special are invalid"); ++ break; ++ case MEK_CALL_HI: ++ case MEK_CALL_LO: ++ case MEK_GOT_HI: ++ case MEK_GOT_LO: ++ case MEK_GOT_RRHI: ++ case MEK_GOT_RRLO: ++ case MEK_GOT_RRHIGHER: ++ case MEK_GOT_RRHIGHEST: ++ case MEK_ABS_HI: ++ case MEK_ABS_LO: ++ case MEK_ABS_HIGHER: ++ case MEK_ABS_HIGHEST: ++ case MEK_PCREL_HI: ++ case MEK_PCREL_LO: ++ case MEK_PCREL_RRHI: ++ case MEK_PCREL_RRHIGHER: ++ case MEK_PCREL_RRHIGHEST: ++ case MEK_PCREL_RRLO: ++ case MEK_PLT: ++ // If we do have nested target-specific expressions, they will be in ++ // a consecutive chain. ++ if (const LoongArchMCExpr *E = dyn_cast(getSubExpr())) ++ E->fixELFSymbolsInTLSFixups(Asm); ++ break; ++ case MEK_TLSGD_HI: ++ case MEK_TLSGD_LO: ++ case MEK_TLSGD_RRHI: ++ case MEK_TLSGD_RRHIGHER: ++ case MEK_TLSGD_RRHIGHEST: ++ case MEK_TLSGD_RRLO: ++ case MEK_TLSLE_HI: ++ case MEK_TLSLE_HIGHER: ++ case MEK_TLSLE_HIGHEST: ++ case MEK_TLSLE_LO: ++ case MEK_TLSIE_HI: ++ case MEK_TLSIE_LO: ++ case MEK_TLSIE_RRHI: ++ case MEK_TLSIE_RRHIGHER: ++ case MEK_TLSIE_RRHIGHEST: ++ case MEK_TLSIE_RRLO: ++ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); ++ break; ++ } ++} +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +new file mode 100644 +index 000000000..7851d478e +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +@@ -0,0 +1,97 @@ ++//===- LoongArchMCExpr.h - LoongArch specific MC expression classes -------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H ++ ++#include "llvm/MC/MCAsmLayout.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCValue.h" ++ ++namespace llvm { ++ ++class LoongArchMCExpr : public MCTargetExpr { ++public: ++ enum LoongArchExprKind { ++ MEK_None, ++ MEK_CALL_HI, ++ MEK_CALL_LO, ++ MEK_GOT_HI, ++ MEK_GOT_LO, ++ MEK_GOT_RRHI, ++ MEK_GOT_RRHIGHER, ++ MEK_GOT_RRHIGHEST, ++ MEK_GOT_RRLO, ++ MEK_ABS_HI, ++ MEK_ABS_HIGHER, ++ MEK_ABS_HIGHEST, ++ MEK_ABS_LO, ++ MEK_PCREL_HI, ++ MEK_PCREL_LO, ++ MEK_PCREL_RRHI, ++ MEK_PCREL_RRHIGHER, ++ MEK_PCREL_RRHIGHEST, ++ MEK_PCREL_RRLO, ++ MEK_TLSLE_HI, ++ MEK_TLSLE_HIGHER, ++ MEK_TLSLE_HIGHEST, ++ MEK_TLSLE_LO, ++ MEK_TLSIE_HI, ++ MEK_TLSIE_LO, ++ MEK_TLSIE_RRHI, ++ MEK_TLSIE_RRHIGHER, ++ MEK_TLSIE_RRHIGHEST, ++ MEK_TLSIE_RRLO, ++ MEK_TLSGD_HI, ++ MEK_TLSGD_LO, ++ MEK_TLSGD_RRHI, ++ MEK_TLSGD_RRHIGHER, ++ MEK_TLSGD_RRHIGHEST, ++ MEK_TLSGD_RRLO, ++ MEK_PLT, ++ MEK_Special, ++ }; ++ ++private: ++ const LoongArchExprKind Kind; ++ const MCExpr *Expr; ++ ++ explicit LoongArchMCExpr(LoongArchExprKind Kind, const MCExpr *Expr) ++ : Kind(Kind), Expr(Expr) {} ++ ++public: ++ static const LoongArchMCExpr *create(LoongArchExprKind Kind, const MCExpr *Expr, ++ MCContext &Ctx); ++ static const LoongArchMCExpr *createGpOff(LoongArchExprKind Kind, const MCExpr *Expr, ++ MCContext &Ctx); ++ ++ /// Get the kind of this expression. ++ LoongArchExprKind getKind() const { return Kind; } ++ ++ /// Get the child of this expression. ++ const MCExpr *getSubExpr() const { return Expr; } ++ ++ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; ++ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, ++ const MCFixup *Fixup) const override; ++ void visitUsedExpr(MCStreamer &Streamer) const override; ++ ++ MCFragment *findAssociatedFragment() const override { ++ return getSubExpr()->findAssociatedFragment(); ++ } ++ ++ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; ++ ++ static bool classof(const MCExpr *E) { ++ return E->getKind() == MCExpr::Target; ++ } ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index 8d71235f6..18e3224ab 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions ---------===// ++//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions -------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,35 +11,47 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArchMCTargetDesc.h" +-#include "LoongArchBaseInfo.h" +-#include "LoongArchInstPrinter.h" +-#include "LoongArchMCAsmInfo.h" ++#include "LoongArchTargetStreamer.h" ++#include "MCTargetDesc/LoongArchAsmBackend.h" ++#include "MCTargetDesc/LoongArchELFStreamer.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchMCAsmInfo.h" + #include "TargetInfo/LoongArchTargetInfo.h" +-#include "llvm/MC/MCAsmInfo.h" +-#include "llvm/MC/MCDwarf.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/MC/MCELFStreamer.h" + #include "llvm/MC/MCInstrAnalysis.h" + #include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCObjectWriter.h" + #include "llvm/MC/MCRegisterInfo.h" + #include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MachineLocation.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/FormattedStream.h" + #include "llvm/MC/TargetRegistry.h" +-#include "llvm/Support/Compiler.h" ++ ++using namespace llvm; + + #define GET_INSTRINFO_MC_DESC +-#define ENABLE_INSTR_PREDICATE_VERIFIER + #include "LoongArchGenInstrInfo.inc" + +-#define GET_REGINFO_MC_DESC +-#include "LoongArchGenRegisterInfo.inc" +- + #define GET_SUBTARGETINFO_MC_DESC + #include "LoongArchGenSubtargetInfo.inc" + +-using namespace llvm; ++#define GET_REGINFO_MC_DESC ++#include "LoongArchGenRegisterInfo.inc" + +-static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { +- MCRegisterInfo *X = new MCRegisterInfo(); +- InitLoongArchMCRegisterInfo(X, LoongArch::R1); +- return X; ++/// Select the LoongArch CPU for the given triple and cpu name. ++/// FIXME: Merge with the copy in LoongArchSubtarget.cpp ++StringRef LoongArch_MC::selectLoongArchCPU(const Triple &TT, StringRef CPU) { ++ if (CPU.empty() || CPU == "generic") { ++ if (TT.isLoongArch32()) ++ CPU = "loongarch32"; //FIXME ++ else ++ CPU = "la464"; ++ } ++ return CPU; + } + + static MCInstrInfo *createLoongArchMCInstrInfo() { +@@ -48,20 +60,24 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { + return X; + } + +-static MCSubtargetInfo * +-createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { +- if (CPU.empty()) +- CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; ++static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { ++ MCRegisterInfo *X = new MCRegisterInfo(); ++ InitLoongArchMCRegisterInfo(X, LoongArch::RA); ++ return X; ++} ++ ++static MCSubtargetInfo *createLoongArchMCSubtargetInfo(const Triple &TT, ++ StringRef CPU, StringRef FS) { ++ CPU = LoongArch_MC::selectLoongArchCPU(TT, CPU); + return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + } + + static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { +- MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT); ++ MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT, Options); + +- // Initial state of the frame pointer is sp(r3). +- MCRegister SP = MRI.getDwarfRegNum(LoongArch::R3, true); ++ unsigned SP = MRI.getDwarfRegNum(LoongArch::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); + MAI->addInitialFrameState(Inst); + +@@ -76,40 +92,96 @@ static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, + return new LoongArchInstPrinter(MAI, MII, MRI); + } + ++static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, ++ std::unique_ptr &&MAB, ++ std::unique_ptr &&OW, ++ std::unique_ptr &&Emitter, ++ bool RelaxAll) { ++ MCStreamer *S; ++ S = createLoongArchELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter), RelaxAll); ++ return S; ++} ++ ++static MCTargetStreamer *createLoongArchAsmTargetStreamer(MCStreamer &S, ++ formatted_raw_ostream &OS, ++ MCInstPrinter *InstPrint, ++ bool isVerboseAsm) { ++ return new LoongArchTargetAsmStreamer(S, OS); ++} ++ ++static MCTargetStreamer *createLoongArchNullTargetStreamer(MCStreamer &S) { ++ return new LoongArchTargetStreamer(S); ++} ++ ++static MCTargetStreamer * ++createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { ++ return new LoongArchTargetELFStreamer(S, STI); ++} ++ + namespace { + + class LoongArchMCInstrAnalysis : public MCInstrAnalysis { + public: +- explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info) +- : MCInstrAnalysis(Info) {} ++ LoongArchMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); +- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { ++ if (NumOps == 0) ++ return false; ++ if (Info->get(Inst.getOpcode()).isBranch() || Inst.getOpcode() == LoongArch::BL) { ++ // just not jirl + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; ++ } else { ++ return false; + } +- +- return false; + } + }; ++} + +-} // end namespace +- +-static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) { ++static MCInstrAnalysis *createLoongArchMCInstrAnalysis(const MCInstrInfo *Info) { + return new LoongArchMCInstrAnalysis(Info); + } + + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { + for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { +- TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); ++ // Register the MC asm info. ++ RegisterMCAsmInfoFn X(*T, createLoongArchMCAsmInfo); ++ ++ // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo); ++ ++ // Register the MC register info. ++ TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); ++ ++ // Register the elf streamer. ++ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); ++ ++ // Register the asm target streamer. ++ TargetRegistry::RegisterAsmTargetStreamer(*T, createLoongArchAsmTargetStreamer); ++ ++ TargetRegistry::RegisterNullTargetStreamer(*T, ++ createLoongArchNullTargetStreamer); ++ ++ // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo); +- TargetRegistry::RegisterMCAsmInfo(*T, createLoongArchMCAsmInfo); +- TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); +- TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); ++ ++ // Register the MC instruction analyzer. ++ TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchMCInstrAnalysis); ++ ++ // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); +- TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis); ++ ++ TargetRegistry::RegisterObjectTargetStreamer( ++ *T, createLoongArchObjectTargetStreamer); ++ ++ // Register the asm backend. ++ TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); + } ++ ++ // Register the MC Code Emitter ++ for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) ++ TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +index ab35a0096..04a5c79e6 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +@@ -1,4 +1,4 @@ +-//===- LoongArchMCTargetDesc.h - LoongArch Target Descriptions --*- C++ -*-===// ++//===-- LoongArchMCTargetDesc.h - LoongArch Target Descriptions -----------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -13,8 +13,8 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H + #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H + +-#include "llvm/MC/MCTargetOptions.h" + #include "llvm/Support/DataTypes.h" ++ + #include + + namespace llvm { +@@ -25,7 +25,15 @@ class MCInstrInfo; + class MCObjectTargetWriter; + class MCRegisterInfo; + class MCSubtargetInfo; ++class MCTargetOptions; ++class StringRef; + class Target; ++class Triple; ++class raw_ostream; ++class raw_pwrite_stream; ++ ++Target &getTheLoongArch32Target(); ++Target &getTheLoongArch64Target(); + + MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx); +@@ -36,20 +44,24 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T, + const MCTargetOptions &Options); + + std::unique_ptr +-createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); ++createLoongArchELFObjectWriter(const Triple &TT, bool IsLPX32); ++ ++namespace LoongArch_MC { ++StringRef selectLoongArchCPU(const Triple &TT, StringRef CPU); ++} + +-} // end namespace llvm ++} // End llvm namespace + +-// Defines symbolic names for LoongArch registers. ++// Defines symbolic names for LoongArch registers. This defines a mapping from ++// register name to register number. + #define GET_REGINFO_ENUM + #include "LoongArchGenRegisterInfo.inc" + +-// Defines symbolic names for LoongArch instructions. ++// Defines symbolic names for the LoongArch instructions. + #define GET_INSTRINFO_ENUM +-#define GET_INSTRINFO_MC_HELPER_DECLS + #include "LoongArchGenInstrInfo.inc" + + #define GET_SUBTARGETINFO_ENUM + #include "LoongArchGenSubtargetInfo.inc" + +-#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H ++#endif +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp +deleted file mode 100644 +index 1509c436c..000000000 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp ++++ /dev/null +@@ -1,51 +0,0 @@ +-//===- LoongArchMatInt.cpp - Immediate materialisation ---------*- C++ -*--===// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +- +-#include "LoongArchMatInt.h" +-#include "MCTargetDesc/LoongArchMCTargetDesc.h" +-#include "llvm/Support/MathExtras.h" +- +-using namespace llvm; +- +-LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { +- // Val: +- // | hi32 | lo32 | +- // +-----------+------------------+------------------+-----------+ +- // | Highest12 | Higher20 | Hi20 | Lo12 | +- // +-----------+------------------+------------------+-----------+ +- // 63 52 51 32 31 12 11 0 +- // +- const int64_t Highest12 = Val >> 52 & 0xFFF; +- const int64_t Higher20 = Val >> 32 & 0xFFFFF; +- const int64_t Hi20 = Val >> 12 & 0xFFFFF; +- const int64_t Lo12 = Val & 0xFFF; +- InstSeq Insts; +- +- if (Highest12 != 0 && SignExtend64<52>(Val) == 0) { +- Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); +- return Insts; +- } +- +- if (Hi20 == 0) +- Insts.push_back(Inst(LoongArch::ORI, Lo12)); +- else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20)) +- Insts.push_back(Inst(LoongArch::ADDI_W, SignExtend64<12>(Lo12))); +- else { +- Insts.push_back(Inst(LoongArch::LU12I_W, SignExtend64<20>(Hi20))); +- if (Lo12 != 0) +- Insts.push_back(Inst(LoongArch::ORI, Lo12)); +- } +- +- if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20)) +- Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20))); +- +- if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12)) +- Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); +- +- return Insts; +-} +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp +new file mode 100644 +index 000000000..4d0e785a3 +--- /dev/null ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp +@@ -0,0 +1,330 @@ ++//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods -------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides LoongArch specific target streamer methods. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchABIInfo.h" ++#include "LoongArchELFStreamer.h" ++#include "LoongArchInstPrinter.h" ++#include "LoongArchMCExpr.h" ++#include "LoongArchMCTargetDesc.h" ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/FormattedStream.h" ++ ++using namespace llvm; ++ ++namespace { ++static cl::opt RoundSectionSizes( ++ "loongarch-round-section-sizes", cl::init(false), ++ cl::desc("Round section sizes up to the section alignment"), cl::Hidden); ++} // end anonymous namespace ++ ++LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) ++ : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { ++ GPRInfoSet = FPRInfoSet = FrameInfoSet = false; ++} ++void LoongArchTargetStreamer::emitDirectiveOptionPic0() {} ++void LoongArchTargetStreamer::emitDirectiveOptionPic2() {} ++void LoongArchTargetStreamer::emitDirectiveSetArch(StringRef Arch) { ++ forbidModuleDirective(); ++} ++void LoongArchTargetStreamer::emitDirectiveSetLoongArch32() { forbidModuleDirective(); } ++void LoongArchTargetStreamer::emitDirectiveSetloongarch64() { forbidModuleDirective(); } ++ ++void LoongArchTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, ++ MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(Op1); ++ TmpInst.addOperand(Op2); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ MCOperand Op2, MCOperand Op3, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(Op2); ++ TmpInst.addOperand(Op3); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(Op1); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createImm(Imm1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm2)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ MCOperand Op2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(Op2); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ unsigned Reg2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, ++ unsigned Reg1, int32_t Imm, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, ++ unsigned Reg1, int16_t Imm0, int16_t Imm1, ++ int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm0)); ++ TmpInst.addOperand(MCOperand::createImm(Imm1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm2)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitAdd(unsigned DstReg, unsigned SrcReg, ++ unsigned TrgReg, bool Is64Bit, ++ const MCSubtargetInfo *STI) { ++ emitRRR(Is64Bit ? LoongArch::ADD_D : LoongArch::ADD_W, DstReg, SrcReg, TrgReg, SMLoc(), ++ STI); ++} ++ ++void LoongArchTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, ++ int16_t ShiftAmount, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ if (ShiftAmount >= 32) { ++ emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI); ++ return; ++ } ++ ++ emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount, IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRRI(LoongArch::ANDI, LoongArch::ZERO, LoongArch::ZERO, 0, IDLoc, STI); ++} ++ ++LoongArchTargetAsmStreamer::LoongArchTargetAsmStreamer(MCStreamer &S, ++ formatted_raw_ostream &OS) ++ : LoongArchTargetStreamer(S), OS(OS) {} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic0() { ++ OS << "\t.option\tpic0\n"; ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic2() { ++ OS << "\t.option\tpic2\n"; ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { ++ OS << "\t.set arch=" << Arch << "\n"; ++ LoongArchTargetStreamer::emitDirectiveSetArch(Arch); ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetLoongArch32() { ++ //OS << "\t.set\tloongarch32\n"; ++ LoongArchTargetStreamer::emitDirectiveSetLoongArch32(); ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetloongarch64() { ++ //OS << "\t.set\tloongarch64\n"; ++ LoongArchTargetStreamer::emitDirectiveSetloongarch64(); ++} ++ ++// This part is for ELF object output. ++LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(MCStreamer &S, ++ const MCSubtargetInfo &STI) ++ : LoongArchTargetStreamer(S), STI(STI) { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ ++ // It's possible that MCObjectFileInfo isn't fully initialized at this point ++ // due to an initialization order problem where LLVMTargetMachine creates the ++ // target streamer before TargetLoweringObjectFile calls ++ // InitializeMCObjectFileInfo. There doesn't seem to be a single place that ++ // covers all cases so this statement covers most cases and direct object ++ // emission must call setPic() once MCObjectFileInfo has been initialized. The ++ // cases we don't handle here are covered by LoongArchAsmPrinter. ++ Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); ++ ++ // Set the header flags that we can in the constructor. ++ // FIXME: This is a fairly terrible hack. We set the rest ++ // of these in the destructor. The problem here is two-fold: ++ // ++ // a: Some of the eflags can be set/reset by directives. ++ // b: There aren't any usage paths that initialize the ABI ++ // pointer until after we initialize either an assembler ++ // or the target machine. ++ // We can fix this by making the target streamer construct ++ // the ABI, but this is fraught with wide ranging dependency ++ // issues as well. ++ unsigned EFlags = MCA.getELFHeaderEFlags(); ++ ++ // FIXME: Fix a dependency issue by instantiating the ABI object to some ++ // default based off the triple. The triple doesn't describe the target ++ // fully, but any external user of the API that uses the MCTargetStreamer ++ // would otherwise crash on assertion failure. ++ ++ ABI = LoongArchABIInfo( ++ STI.getTargetTriple().getArch() == Triple::ArchType::loongarch32 ++ ? LoongArchABIInfo::LP32() ++ : LoongArchABIInfo::LP64()); ++ ++ EFlags |= ELF::EF_LARCH_ABI; ++ MCA.setELFHeaderEFlags(EFlags); ++} ++ ++void LoongArchTargetELFStreamer::emitLabel(MCSymbol *S) { ++ auto *Symbol = cast(S); ++ getStreamer().getAssembler().registerSymbol(*Symbol); ++ uint8_t Type = Symbol->getType(); ++ if (Type != ELF::STT_FUNC) ++ return; ++ ++} ++ ++void LoongArchTargetELFStreamer::finish() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); ++ ++ // .bss, .text and .data are always at least 16-byte aligned. ++ MCSection &TextSection = *OFI.getTextSection(); ++ MCA.registerSection(TextSection); ++ MCSection &DataSection = *OFI.getDataSection(); ++ MCA.registerSection(DataSection); ++ MCSection &BSSSection = *OFI.getBSSSection(); ++ MCA.registerSection(BSSSection); ++ ++ TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); ++ DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); ++ BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); ++ ++ if (RoundSectionSizes) { ++ // Make sections sizes a multiple of the alignment. This is useful for ++ // verifying the output of IAS against the output of other assemblers but ++ // it's not necessary to produce a correct object and increases section ++ // size. ++ MCStreamer &OS = getStreamer(); ++ for (MCSection &S : MCA) { ++ MCSectionELF &Section = static_cast(S); ++ ++ unsigned Alignment = Section.getAlignment(); ++ if (Alignment) { ++ OS.switchSection(&Section); ++ if (Section.useCodeAlign()) ++ OS.emitCodeAlignment(Alignment, &STI, Alignment); ++ else ++ OS.emitValueToAlignment(Alignment, 0, 1, Alignment); ++ } ++ } ++ } ++ ++ // Update e_header flags. See the FIXME and comment above in ++ // the constructor for a full rundown on this. ++ unsigned EFlags = MCA.getELFHeaderEFlags(); ++ ++ // ABI ++ // LP64 does not require any ABI bits. ++ if (getABI().IsLP32()) ++ EFlags |= ELF::EF_LARCH_ABI_LP32; ++ else if (getABI().IsLPX32()) ++ EFlags |= ELF::EF_LARCH_ABI_LPX32; ++ else ++ EFlags |= ELF::EF_LARCH_ABI_LP64; ++ ++ MCA.setELFHeaderEFlags(EFlags); ++} ++ ++MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { ++ return static_cast(Streamer); ++} ++ ++void LoongArchTargetELFStreamer::emitDirectiveOptionPic0() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ unsigned Flags = MCA.getELFHeaderEFlags(); ++ // This option overrides other PIC options like -KPIC. ++ Pic = false; ++ ///XXX:Reloc no this flags ++ //Flags &= ~ELF::EF_LOONGARCH_PIC; ++ MCA.setELFHeaderEFlags(Flags); ++} ++ ++void LoongArchTargetELFStreamer::emitDirectiveOptionPic2() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ unsigned Flags = MCA.getELFHeaderEFlags(); ++ Pic = true; ++ // NOTE: We are following the GAS behaviour here which means the directive ++ // 'pic2' also sets the CPIC bit in the ELF header. This is different from ++ // what is stated in the SYSV ABI which consider the bits EF_LOONGARCH_PIC and ++ // EF_LOONGARCH_CPIC to be mutually exclusive. ++ ///XXX:Reloc no this flags ++ //Flags |= ELF::EF_LOONGARCH_PIC | ELF::EF_LOONGARCH_CPIC; ++ MCA.setELFHeaderEFlags(Flags); ++} +diff --git a/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt b/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt +index e14360ff5..f53ddba40 100644 +--- a/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt ++++ b/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt +@@ -2,7 +2,6 @@ add_llvm_component_library(LLVMLoongArchInfo + LoongArchTargetInfo.cpp + + LINK_COMPONENTS +- MC + Support + + ADD_TO_COMPONENT +diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +index 106545100..e6b845180 100644 +--- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp ++++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +@@ -1,4 +1,4 @@ +-//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation ---------===// ++//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation -------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -21,10 +21,14 @@ Target &llvm::getTheLoongArch64Target() { + } + + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { +- RegisterTarget X( +- getTheLoongArch32Target(), "loongarch32", "32-bit LoongArch", +- "LoongArch"); +- RegisterTarget Y( +- getTheLoongArch64Target(), "loongarch64", "64-bit LoongArch", +- "LoongArch"); ++#if 0 ++ //TODO: support it in futrue ++ RegisterTarget ++ X(getTheLoongArch32Target(), "loongarch32", "LoongArch (32-bit)", "LoongArch"); ++#endif ++ RegisterTarget ++ A(getTheLoongArch64Target(), "loongarch64", "LoongArch (64-bit)", ++ "LoongArch"); + } +diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h +index b24cf8795..7dce2497f 100644 +--- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h ++++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h +@@ -1,4 +1,4 @@ +-//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -*- C++ -*-===// ++//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -----------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -16,6 +16,6 @@ class Target; + Target &getTheLoongArch32Target(); + Target &getTheLoongArch64Target(); + +-} // end namespace llvm ++} // namespace llvm + + #endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H +diff --git a/llvm/test/CodeGen/LoongArch/1ri.mir b/llvm/test/CodeGen/LoongArch/1ri.mir +deleted file mode 100644 +index c7d74b987..000000000 +--- a/llvm/test/CodeGen/LoongArch/1ri.mir ++++ /dev/null +@@ -1,96 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 1RI20 +-# ------------------------------------------------------------------------------------------------- +-# ---------------------+-----------------------------------------------------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------+-----------------------------------------------------------+--------------- +-# opcode | imm20 | rd +-# ---------------------+-----------------------------------------------------------+--------------- +- +---- +-# CHECK-LABEL: test_LU12I_W: +-# CHECK-ENC: 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 +-# CHECK-ASM: lu12i.w $a0, 49 +-name: test_LU12I_W +-body: | +- bb.0: +- $r4 = LU12I_W 49 +-... +---- +-# CHECK-LABEL: test_LU32I_D: +-# CHECK-ENC: 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 +-# CHECK-ASM: lu32i.d $a0, 196 +-name: test_LU32I_D +-body: | +- bb.0: +- $r4 = LU32I_D $r4, 196 +-... +---- +-# CHECK-LABEL: test_PCADDI: +-# CHECK-ENC: 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 +-# CHECK-ASM: pcaddi $a0, 187 +-name: test_PCADDI +-body: | +- bb.0: +- $r4 = PCADDI 187 +-... +---- +-# CHECK-LABEL: test_PCALAU12I: +-# CHECK-ENC: 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0 0 +-# CHECK-ASM: pcalau12i $a0, 89 +-name: test_PCALAU12I +-body: | +- bb.0: +- $r4 = PCALAU12I 89 +-... +---- +-# CHECK-LABEL: test_PCADDU12I: +-# CHECK-ENC: 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: pcaddu12i $a0, 37 +-name: test_PCADDU12I +-body: | +- bb.0: +- $r4 = PCADDU12I 37 +-... +---- +-# CHECK-LABEL: test_PCADDU18I: +-# CHECK-ENC: 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 +-# CHECK-ASM: pcaddu18i $a0, 26 +-name: test_PCADDU18I +-body: | +- bb.0: +- $r4 = PCADDU18I 26 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 1RI21 +-# ------------------------------------------------------------------------------------------------- +-# ------------------+-----------------------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------+-----------------------------------------------+--------------+--------------- +-# opcode | imm21{15-0} | rj | imm21{20-16} +-# ------------------+-----------------------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_BEQZ: +-# CHECK-ENC: 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 +-# CHECK-ASM: beqz $a0, 92 +-name: test_BEQZ +-body: | +- bb.0: +- BEQZ $r4, 92 +-... +---- +-# CHECK-LABEL: test_BNEZ: +-# CHECK-ENC: 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 +-# CHECK-ASM: bnez $a0, 84 +-name: test_BNEZ +-body: | +- bb.0: +- BNEZ $r4, 84 +diff --git a/llvm/test/CodeGen/LoongArch/2r.mir b/llvm/test/CodeGen/LoongArch/2r.mir +deleted file mode 100644 +index 488944526..000000000 +--- a/llvm/test/CodeGen/LoongArch/2r.mir ++++ /dev/null +@@ -1,230 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2R +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------------------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------------------------------------------+--------------+--------------- +-# opcode | rj | rd +-# ------------------------------------------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_CLO_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: clo.w $a0, $a1 +-name: test_CLO_W +-body: | +- bb.0: +- $r4 = CLO_W $r5 +-... +---- +-# CHECK-LABEL: test_CLZ_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: clz.w $a0, $a1 +-name: test_CLZ_W +-body: | +- bb.0: +- $r4 = CLZ_W $r5 +-... +---- +-# CHECK-LABEL: test_CTO_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: cto.w $a0, $a1 +-name: test_CTO_W +-body: | +- bb.0: +- $r4 = CTO_W $r5 +-... +---- +-# CHECK-LABEL: test_CTZ_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ctz.w $a0, $a1 +-name: test_CTZ_W +-body: | +- bb.0: +- $r4 = CTZ_W $r5 +-... +---- +-# CHECK-LABEL: test_CLO_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: clo.d $a0, $a1 +-name: test_CLO_D +-body: | +- bb.0: +- $r4 = CLO_D $r5 +-... +---- +-# CHECK-LABEL: test_CLZ_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: clz.d $a0, $a1 +-name: test_CLZ_D +-body: | +- bb.0: +- $r4 = CLZ_D $r5 +-... +---- +-# CHECK-LABEL: test_CTO_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: cto.d $a0, $a1 +-name: test_CTO_D +-body: | +- bb.0: +- $r4 = CTO_D $r5 +-... +---- +-# CHECK-LABEL: test_CTZ_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ctz.d $a0, $a1 +-name: test_CTZ_D +-body: | +- bb.0: +- $r4 = CTZ_D $r5 +-... +---- +-# CHECK-LABEL: test_REVB_2H: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revb.2h $a0, $a1 +-name: test_REVB_2H +-body: | +- bb.0: +- $r4 = REVB_2H $r5 +-... +---- +-# CHECK-LABEL: test_REVB_4H: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revb.4h $a0, $a1 +-name: test_REVB_4H +-body: | +- bb.0: +- $r4 = REVB_4H $r5 +-... +---- +-# CHECK-LABEL: test_REVB_2W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revb.2w $a0, $a1 +-name: test_REVB_2W +-body: | +- bb.0: +- $r4 = REVB_2W $r5 +-... +---- +-# CHECK-LABEL: test_REVB_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revb.d $a0, $a1 +-name: test_REVB_D +-body: | +- bb.0: +- $r4 = REVB_D $r5 +-... +---- +-# CHECK-LABEL: test_REVH_2W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revh.2w $a0, $a1 +-name: test_REVH_2W +-body: | +- bb.0: +- $r4 = REVH_2W $r5 +-... +---- +-# CHECK-LABEL: test_REVH_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: revh.d $a0, $a1 +-name: test_REVH_D +-body: | +- bb.0: +- $r4 = REVH_D $r5 +-... +---- +-# CHECK-LABEL: test_BITREV_4B: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bitrev.4b $a0, $a1 +-name: test_BITREV_4B +-body: | +- bb.0: +- $r4 = BITREV_4B $r5 +-... +---- +-# CHECK-LABEL: test_BITREV_8B: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bitrev.8b $a0, $a1 +-name: test_BITREV_8B +-body: | +- bb.0: +- $r4 = BITREV_8B $r5 +-... +---- +-# CHECK-LABEL: test_BITREV_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bitrev.w $a0, $a1 +-name: test_BITREV_W +-body: | +- bb.0: +- $r4 = BITREV_W $r5 +-... +---- +-# CHECK-LABEL: test_BITREV_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bitrev.d $a0, $a1 +-name: test_BITREV_D +-body: | +- bb.0: +- $r4 = BITREV_D $r5 +-... +---- +-# CHECK-LABEL: test_EXT_W_H: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ext.w.h $a0, $a1 +-name: test_EXT_W_H +-body: | +- bb.0: +- $r4 = EXT_W_H $r5 +-... +---- +-# CHECK-LABEL: test_EXT_W_B: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ext.w.b $a0, $a1 +-name: test_EXT_W_B +-body: | +- bb.0: +- $r4 = EXT_W_B $r5 +-... +---- +-# CHECK-LABEL: test_CPUCFG: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: cpucfg $a0, $a1 +-name: test_CPUCFG +-body: | +- bb.0: +- $r4 = CPUCFG $r5 +-... +---- +-# CHECK-LABEL: test_RDTIMEL_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rdtimel.w $a0, $a1 +-name: test_RDTIMEL_W +-body: | +- bb.0: +- $r4, $r5 = RDTIMEL_W +-... +---- +-# CHECK-LABEL: test_RDTIMEH_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rdtimeh.w $a0, $a1 +-name: test_RDTIMEH_W +-body: | +- bb.0: +- $r4, $r5 = RDTIMEH_W +-... +---- +-# CHECK-LABEL: test_RDTIME_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rdtime.d $a0, $a1 +-name: test_RDTIME_D +-body: | +- bb.0: +- $r4, $r5 = RDTIME_D +diff --git a/llvm/test/CodeGen/LoongArch/2ri.mir b/llvm/test/CodeGen/LoongArch/2ri.mir +deleted file mode 100644 +index 263fed42c..000000000 +--- a/llvm/test/CodeGen/LoongArch/2ri.mir ++++ /dev/null +@@ -1,432 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2RI5 +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------------+--------------+--------------+--------------- +-# opcode | imm5 | rj | rd +-# ---------------------------------------------------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_SLLI_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: slli.w $a0, $a1, 0 +-name: test_SLLI_W +-body: | +- bb.0: +- $r4 = SLLI_W $r5, 0 +-... +---- +-# CHECK-LABEL: test_SRLI_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srli.w $a0, $a1, 30 +-name: test_SRLI_W +-body: | +- bb.0: +- $r4 = SRLI_W $r5, 30 +-... +---- +-# CHECK-LABEL: test_SRAI_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srai.w $a0, $a1, 24 +-name: test_SRAI_W +-body: | +- bb.0: +- $r4 = SRAI_W $r5, 24 +-... +---- +-# CHECK-LABEL: test_ROTRI_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rotri.w $a0, $a1, 23 +-name: test_ROTRI_W +-body: | +- bb.0: +- $r4 = ROTRI_W $r5, 23 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2RI6 +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------------------------+-----------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------------------------+-----------------+--------------+--------------- +-# opcode | imm6 | rj | rd +-# ------------------------------------------------+-----------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_SLLI_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: slli.d $a0, $a1, 39 +-name: test_SLLI_D +-body: | +- bb.0: +- $r4 = SLLI_D $r5, 39 +-... +---- +-# CHECK-LABEL: test_SRLI_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srli.d $a0, $a1, 38 +-name: test_SRLI_D +-body: | +- bb.0: +- $r4 = SRLI_D $r5, 38 +-... +---- +-# CHECK-LABEL: test_SRAI_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srai.d $a0, $a1, 27 +-name: test_SRAI_D +-body: | +- bb.0: +- $r4 = SRAI_D $r5, 27 +-... +---- +-# CHECK-LABEL: test_ROTRI_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rotri.d $a0, $a1, 7 +-name: test_ROTRI_D +-body: | +- bb.0: +- $r4 = ROTRI_D $r5, 7 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2RI12 +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------+-----------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------+-----------------------------------+--------------+--------------- +-# opcode | imm12 | rj | rd +-# ------------------------------+-----------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_SLTI: +-# CHECK-ENC: 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: slti $a0, $a1, 235 +-name: test_SLTI +-body: | +- bb.0: +- $r4 = SLTI $r5, 235 +-... +---- +-# CHECK-LABEL: test_SLTUI: +-# CHECK-ENC: 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sltui $a0, $a1, 162 +-name: test_SLTUI +-body: | +- bb.0: +- $r4 = SLTUI $r5, 162 +-... +---- +-# CHECK-LABEL: test_ADDI_W: +-# CHECK-ENC: 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: addi.w $a0, $a1, 246 +-name: test_ADDI_W +-body: | +- bb.0: +- $r4 = ADDI_W $r5, 246 +-... +---- +-# CHECK-LABEL: test_ADDI_D: +-# CHECK-ENC: 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: addi.d $a0, $a1, 75 +-name: test_ADDI_D +-body: | +- bb.0: +- $r4 = ADDI_D $r5, 75 +-... +---- +-# CHECK-LABEL: test_LU52I_D: +-# CHECK-ENC: 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: lu52i.d $a0, $a1, 195 +-name: test_LU52I_D +-body: | +- bb.0: +- $r4 = LU52I_D $r5, 195 +-... +---- +-# CHECK-LABEL: test_ANDI: +-# CHECK-ENC: 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: andi $a0, $a1, 106 +-name: test_ANDI +-body: | +- bb.0: +- $r4 = ANDI $r5, 106 +-... +---- +-# CHECK-LABEL: test_ORI: +-# CHECK-ENC: 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ori $a0, $a1, 47 +-name: test_ORI +-body: | +- bb.0: +- $r4 = ORI $r5, 47 +-... +---- +-# CHECK-LABEL: test_XORI: +-# CHECK-ENC: 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: xori $a0, $a1, 99 +-name: test_XORI +-body: | +- bb.0: +- $r4 = XORI $r5, 99 +-... +---- +-# CHECK-LABEL: test_LD_B: +-# CHECK-ENC: 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.b $a0, $a1, 21 +-name: test_LD_B +-body: | +- bb.0: +- $r4 = LD_B $r5, 21 +-... +---- +-# CHECK-LABEL: test_LD_H: +-# CHECK-ENC: 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.h $a0, $a1, 80 +-name: test_LD_H +-body: | +- bb.0: +- $r4 = LD_H $r5, 80 +-... +---- +-# CHECK-LABEL: test_LD_W: +-# CHECK-ENC: 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.w $a0, $a1, 92 +-name: test_LD_W +-body: | +- bb.0: +- $r4 = LD_W $r5, 92 +-... +---- +-# CHECK-LABEL: test_LD_BU: +-# CHECK-ENC: 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.bu $a0, $a1, 150 +-name: test_LD_BU +-body: | +- bb.0: +- $r4 = LD_BU $r5, 150 +-... +---- +-# CHECK-LABEL: test_LD_HU: +-# CHECK-ENC: 0 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.hu $a0, $a1, 198 +-name: test_LD_HU +-body: | +- bb.0: +- $r4 = LD_HU $r5, 198 +-... +---- +-# CHECK-LABEL: test_LD_WU: +-# CHECK-ENC: 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ld.wu $a0, $a1, 31 +-name: test_LD_WU +-body: | +- bb.0: +- $r4 = LD_WU $r5, 31 +-... +---- +-# CHECK-LABEL: test_ST_B: +-# CHECK-ENC: 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: st.b $a0, $a1, 95 +-name: test_ST_B +-body: | +- bb.0: +- ST_B $r4, $r5, 95 +-... +---- +-# CHECK-LABEL: test_ST_H: +-# CHECK-ENC: 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: st.h $a0, $a1, 122 +-name: test_ST_H +-body: | +- bb.0: +- ST_H $r4, $r5, 122 +-... +---- +-# CHECK-LABEL: test_ST_W: +-# CHECK-ENC: 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: st.w $a0, $a1, 175 +-name: test_ST_W +-body: | +- bb.0: +- ST_W $r4, $r5, 175 +-... +---- +-# CHECK-LABEL: test_ST_D: +-# CHECK-ENC: 0 0 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: st.d $a0, $a1, 60 +-name: test_ST_D +-body: | +- bb.0: +- ST_D $r4, $r5, 60 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2RI14 +-# ------------------------------------------------------------------------------------------------- +-# ------------------------+-----------------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------+-----------------------------------------+--------------+--------------- +-# opcode | imm14 | rj | rd +-# ------------------------+-----------------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_LDPTR_W: +-# CHECK-ENC: 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldptr.w $a0, $a1, 264 +-name: test_LDPTR_W +-body: | +- bb.0: +- $r4 = LDPTR_W $r5, 264 +-... +---- +-# CHECK-LABEL: test_LDPTR_D: +-# CHECK-ENC: 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldptr.d $a0, $a1, 224 +-name: test_LDPTR_D +-body: | +- bb.0: +- $r4 = LDPTR_D $r5, 224 +-... +---- +-# CHECK-LABEL: test_STPTR_W: +-# CHECK-ENC: 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stptr.w $a0, $a1, 348 +-name: test_STPTR_W +-body: | +- bb.0: +- STPTR_W $r4, $r5, 348 +-... +---- +-# CHECK-LABEL: test_STPTR_D: +-# CHECK-ENC: 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stptr.d $a0, $a1, 580 +-name: test_STPTR_D +-body: | +- bb.0: +- STPTR_D $r4, $r5, 580 +-... +---- +-# CHECK-LABEL: test_LL_W: +-# CHECK-ENC: 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ll.w $a0, $a1, 972 +-name: test_LL_W +-body: | +- bb.0: +- $r4 = LL_W $r5, 972 +-... +---- +-# CHECK-LABEL: test_LL_D: +-# CHECK-ENC: 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ll.d $a0, $a1, 296 +-name: test_LL_D +-body: | +- bb.0: +- $r4 = LL_D $r5, 296 +-... +---- +-# CHECK-LABEL: test_SC_W: +-# CHECK-ENC: 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sc.w $a0, $a1, 384 +-name: test_SC_W +-body: | +- bb.0: +- $r4 = SC_W $r4, $r5, 384 +-... +---- +-# CHECK-LABEL: test_SC_D: +-# CHECK-ENC: 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sc.d $a0, $a1, 420 +-name: test_SC_D +-body: | +- bb.0: +- $r4 = SC_D $r4, $r5, 420 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 2RI16 +-# ------------------------------------------------------------------------------------------------- +-# ------------------+-----------------------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------+-----------------------------------------------+--------------+--------------- +-# opcode | imm16 | rj | rd +-# ------------------+-----------------------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_ADDU16I_D: +-# CHECK-ENC: 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: addu16i.d $a0, $a1, 23 +-name: test_ADDU16I_D +-body: | +- bb.0: +- $r4 = ADDU16I_D $r5, 23 +-... +---- +-# CHECK-LABEL: test_JIRL: +-# CHECK-ENC: 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: jirl $a0, $a1, 196 +-name: test_JIRL +-body: | +- bb.0: +- $r4 = JIRL $r5, 196 +-... +---- +-# CHECK-LABEL: test_BEQ: +-# CHECK-ENC: 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: beq $a0, $a1, 784 +-name: test_BEQ +-body: | +- bb.0: +- BEQ $r4, $r5, 784 +-... +---- +-# CHECK-LABEL: test_BNE: +-# CHECK-ENC: 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: bne $a0, $a1, 76 +-name: test_BNE +-body: | +- bb.0: +- BNE $r4, $r5, 76 +-... +---- +-# CHECK-LABEL: test_BLT: +-# CHECK-ENC: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: blt $a0, $a1, 492 +-name: test_BLT +-body: | +- bb.0: +- BLT $r4, $r5, 492 +-... +---- +-# CHECK-LABEL: test_BGE: +-# CHECK-ENC: 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: bge $a0, $a1, 48 +-name: test_BGE +-body: | +- bb.0: +- BGE $r4, $r5, 48 +-... +---- +-# CHECK-LABEL: test_BLTU: +-# CHECK-ENC: 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: bltu $a0, $a1, 68 +-name: test_BLTU +-body: | +- bb.0: +- BLTU $r4, $r5, 68 +-... +---- +-# CHECK-LABEL: test_BGEU: +-# CHECK-ENC: 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 +-# CHECK-ASM: bgeu $a0, $a1, 352 +-name: test_BGEU +-body: | +- bb.0: +- BGEU $r4, $r5, 352 +diff --git a/llvm/test/CodeGen/LoongArch/3r.mir b/llvm/test/CodeGen/LoongArch/3r.mir +deleted file mode 100644 +index a1b97d563..000000000 +--- a/llvm/test/CodeGen/LoongArch/3r.mir ++++ /dev/null +@@ -1,995 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 3R +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------------+--------------+--------------+--------------- +-# opcode | rk | rj | rd +-# ---------------------------------------------------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_ADD_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: add.w $a0, $a1, $a0 +-name: test_ADD_W +-body: | +- bb.0: +- $r4 = ADD_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_ADD_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: add.d $a0, $a1, $a0 +-name: test_ADD_D +-body: | +- bb.0: +- $r4 = ADD_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SUB_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sub.w $a0, $a1, $a0 +-name: test_SUB_W +-body: | +- bb.0: +- $r4 = SUB_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SUB_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sub.d $a0, $a1, $a0 +-name: test_SUB_D +-body: | +- bb.0: +- $r4 = SUB_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SLT: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: slt $a0, $a1, $a0 +-name: test_SLT +-body: | +- bb.0: +- $r4 = SLT $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SLTU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sltu $a0, $a1, $a0 +-name: test_SLTU +-body: | +- bb.0: +- $r4 = SLTU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MASKEQZ: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: maskeqz $a0, $a1, $a0 +-name: test_MASKEQZ +-body: | +- bb.0: +- $r4 = MASKEQZ $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MASKNEZ: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: masknez $a0, $a1, $a0 +-name: test_MASKNEZ +-body: | +- bb.0: +- $r4 = MASKNEZ $r5, $r4 +-... +---- +-# CHECK-LABEL: test_NOR: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: nor $a0, $a1, $a0 +-name: test_NOR +-body: | +- bb.0: +- $r4 = NOR $r5, $r4 +-... +---- +-# CHECK-LABEL: test_AND: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: and $a0, $a1, $a0 +-name: test_AND +-body: | +- bb.0: +- $r4 = AND $r5, $r4 +-... +---- +-# CHECK-LABEL: test_OR: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: or $a0, $a1, $a0 +-name: test_OR +-body: | +- bb.0: +- $r4 = OR $r5, $r4 +-... +---- +-# CHECK-LABEL: test_XOR: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: xor $a0, $a1, $a0 +-name: test_XOR +-body: | +- bb.0: +- $r4 = XOR $r5, $r4 +-... +---- +-# CHECK-LABEL: test_ORN: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: orn $a0, $a1, $a0 +-name: test_ORN +-body: | +- bb.0: +- $r4 = ORN $r5, $r4 +-... +---- +-# CHECK-LABEL: test_ANDN: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: andn $a0, $a1, $a0 +-name: test_ANDN +-body: | +- bb.0: +- $r4 = ANDN $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SLL_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sll.w $a0, $a1, $a0 +-name: test_SLL_W +-body: | +- bb.0: +- $r4 = SLL_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SRL_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srl.w $a0, $a1, $a0 +-name: test_SRL_W +-body: | +- bb.0: +- $r4 = SRL_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SRA_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sra.w $a0, $a1, $a0 +-name: test_SRA_W +-body: | +- bb.0: +- $r4 = SRA_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SLL_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sll.d $a0, $a1, $a0 +-name: test_SLL_D +-body: | +- bb.0: +- $r4 = SLL_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SRL_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: srl.d $a0, $a1, $a0 +-name: test_SRL_D +-body: | +- bb.0: +- $r4 = SRL_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_SRA_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: sra.d $a0, $a1, $a0 +-name: test_SRA_D +-body: | +- bb.0: +- $r4 = SRA_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_ROTR_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rotr.w $a0, $a1, $a0 +-name: test_ROTR_W +-body: | +- bb.0: +- $r4 = ROTR_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_ROTR_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: rotr.d $a0, $a1, $a0 +-name: test_ROTR_D +-body: | +- bb.0: +- $r4 = ROTR_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MUL_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mul.w $a0, $a1, $a0 +-name: test_MUL_W +-body: | +- bb.0: +- $r4 = MUL_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULH_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulh.w $a0, $a1, $a0 +-name: test_MULH_W +-body: | +- bb.0: +- $r4 = MULH_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULH_WU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulh.wu $a0, $a1, $a0 +-name: test_MULH_WU +-body: | +- bb.0: +- $r4 = MULH_WU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MUL_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mul.d $a0, $a1, $a0 +-name: test_MUL_D +-body: | +- bb.0: +- $r4 = MUL_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULH_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulh.d $a0, $a1, $a0 +-name: test_MULH_D +-body: | +- bb.0: +- $r4 = MULH_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULH_DU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulh.du $a0, $a1, $a0 +-name: test_MULH_DU +-body: | +- bb.0: +- $r4 = MULH_DU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULW_D_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulw.d.w $a0, $a1, $a0 +-name: test_MULW_D_W +-body: | +- bb.0: +- $r4 = MULW_D_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MULW_D_WU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mulw.d.wu $a0, $a1, $a0 +-name: test_MULW_D_WU +-body: | +- bb.0: +- $r4 = MULW_D_WU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_DIV_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: div.w $a0, $a1, $a0 +-name: test_DIV_W +-body: | +- bb.0: +- $r4 = DIV_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MOD_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mod.w $a0, $a1, $a0 +-name: test_MOD_W +-body: | +- bb.0: +- $r4 = MOD_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_DIV_WU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: div.wu $a0, $a1, $a0 +-name: test_DIV_WU +-body: | +- bb.0: +- $r4 = DIV_WU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MOD_WU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mod.wu $a0, $a1, $a0 +-name: test_MOD_WU +-body: | +- bb.0: +- $r4 = MOD_WU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_DIV_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: div.d $a0, $a1, $a0 +-name: test_DIV_D +-body: | +- bb.0: +- $r4 = DIV_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MOD_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mod.d $a0, $a1, $a0 +-name: test_MOD_D +-body: | +- bb.0: +- $r4 = MOD_D $r5, $r4 +-... +---- +-# CHECK-LABEL: test_DIV_DU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: div.du $a0, $a1, $a0 +-name: test_DIV_DU +-body: | +- bb.0: +- $r4 = DIV_DU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_MOD_DU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: mod.du $a0, $a1, $a0 +-name: test_MOD_DU +-body: | +- bb.0: +- $r4 = MOD_DU $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRC_W_B_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crc.w.b.w $a0, $a1, $a0 +-name: test_CRC_W_B_W +-body: | +- bb.0: +- $r4 = CRC_W_B_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRC_W_H_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crc.w.h.w $a0, $a1, $a0 +-name: test_CRC_W_H_W +-body: | +- bb.0: +- $r4 = CRC_W_H_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRC_W_W_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crc.w.w.w $a0, $a1, $a0 +-name: test_CRC_W_W_W +-body: | +- bb.0: +- $r4 = CRC_W_W_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRC_W_D_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crc.w.d.w $a0, $a1, $a0 +-name: test_CRC_W_D_W +-body: | +- bb.0: +- $r4 = CRC_W_D_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRCC_W_B_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crcc.w.b.w $a0, $a1, $a0 +-name: test_CRCC_W_B_W +-body: | +- bb.0: +- $r4 = CRCC_W_B_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRCC_W_H_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crcc.w.h.w $a0, $a1, $a0 +-name: test_CRCC_W_H_W +-body: | +- bb.0: +- $r4 = CRCC_W_H_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRCC_W_W_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crcc.w.w.w $a0, $a1, $a0 +-name: test_CRCC_W_W_W +-body: | +- bb.0: +- $r4 = CRCC_W_W_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_CRCC_W_D_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: crcc.w.d.w $a0, $a1, $a0 +-name: test_CRCC_W_D_W +-body: | +- bb.0: +- $r4 = CRCC_W_D_W $r5, $r4 +-... +---- +-# CHECK-LABEL: test_AMSWAP_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amswap_db.w $a0, $a1, $a2 +-name: test_AMSWAP_DB_W +-body: | +- bb.0: +- $r4 = AMSWAP_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMSWAP_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amswap_db.d $a0, $a1, $a2 +-name: test_AMSWAP_DB_D +-body: | +- bb.0: +- $r4 = AMSWAP_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMADD_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amadd_db.w $a0, $a1, $a2 +-name: test_AMADD_DB_W +-body: | +- bb.0: +- $r4 = AMADD_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMADD_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amadd_db.d $a0, $a1, $a2 +-name: test_AMADD_DB_D +-body: | +- bb.0: +- $r4 = AMADD_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMAND_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amand_db.w $a0, $a1, $a2 +-name: test_AMAND_DB_W +-body: | +- bb.0: +- $r4 = AMAND_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMAND_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amand_db.d $a0, $a1, $a2 +-name: test_AMAND_DB_D +-body: | +- bb.0: +- $r4 = AMAND_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMOR_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amor_db.w $a0, $a1, $a2 +-name: test_AMOR_DB_W +-body: | +- bb.0: +- $r4 = AMOR_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMOR_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amor_db.d $a0, $a1, $a2 +-name: test_AMOR_DB_D +-body: | +- bb.0: +- $r4 = AMOR_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMXOR_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amxor_db.w $a0, $a1, $a2 +-name: test_AMXOR_DB_W +-body: | +- bb.0: +- $r4 = AMXOR_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMXOR_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amxor_db.d $a0, $a1, $a2 +-name: test_AMXOR_DB_D +-body: | +- bb.0: +- $r4 = AMXOR_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax_db.w $a0, $a1, $a2 +-name: test_AMMAX_DB_W +-body: | +- bb.0: +- $r4 = AMMAX_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax_db.d $a0, $a1, $a2 +-name: test_AMMAX_DB_D +-body: | +- bb.0: +- $r4 = AMMAX_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_DB_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin_db.w $a0, $a1, $a2 +-name: test_AMMIN_DB_W +-body: | +- bb.0: +- $r4 = AMMIN_DB_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_DB_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin_db.d $a0, $a1, $a2 +-name: test_AMMIN_DB_D +-body: | +- bb.0: +- $r4 = AMMIN_DB_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_DB_WU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax_db.wu $a0, $a1, $a2 +-name: test_AMMAX_DB_WU +-body: | +- bb.0: +- $r4 = AMMAX_DB_WU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_DB_DU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax_db.du $a0, $a1, $a2 +-name: test_AMMAX_DB_DU +-body: | +- bb.0: +- $r4 = AMMAX_DB_DU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_DB_WU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin_db.wu $a0, $a1, $a2 +-name: test_AMMIN_DB_WU +-body: | +- bb.0: +- $r4 = AMMIN_DB_WU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_DB_DU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin_db.du $a0, $a1, $a2 +-name: test_AMMIN_DB_DU +-body: | +- bb.0: +- $r4 = AMMIN_DB_DU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMSWAP_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amswap.w $a0, $a1, $a2 +-name: test_AMSWAP_W +-body: | +- bb.0: +- $r4 = AMSWAP_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMSWAP_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amswap.d $a0, $a1, $a2 +-name: test_AMSWAP_D +-body: | +- bb.0: +- $r4 = AMSWAP_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMADD_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amadd.w $a0, $a1, $a2 +-name: test_AMADD_W +-body: | +- bb.0: +- $r4 = AMADD_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMADD_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amadd.d $a0, $a1, $a2 +-name: test_AMADD_D +-body: | +- bb.0: +- $r4 = AMADD_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMAND_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amand.w $a0, $a1, $a2 +-name: test_AMAND_W +-body: | +- bb.0: +- $r4 = AMAND_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMAND_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amand.d $a0, $a1, $a2 +-name: test_AMAND_D +-body: | +- bb.0: +- $r4 = AMAND_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMOR_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amor.w $a0, $a1, $a2 +-name: test_AMOR_W +-body: | +- bb.0: +- $r4 = AMOR_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMOR_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amor.d $a0, $a1, $a2 +-name: test_AMOR_D +-body: | +- bb.0: +- $r4 = AMOR_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMXOR_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amxor.w $a0, $a1, $a2 +-name: test_AMXOR_W +-body: | +- bb.0: +- $r4 = AMXOR_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMXOR_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: amxor.d $a0, $a1, $a2 +-name: test_AMXOR_D +-body: | +- bb.0: +- $r4 = AMXOR_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax.w $a0, $a1, $a2 +-name: test_AMMAX_W +-body: | +- bb.0: +- $r4 = AMMAX_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax.d $a0, $a1, $a2 +-name: test_AMMAX_D +-body: | +- bb.0: +- $r4 = AMMAX_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin.w $a0, $a1, $a2 +-name: test_AMMIN_W +-body: | +- bb.0: +- $r4 = AMMIN_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin.d $a0, $a1, $a2 +-name: test_AMMIN_D +-body: | +- bb.0: +- $r4 = AMMIN_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_WU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax.wu $a0, $a1, $a2 +-name: test_AMMAX_WU +-body: | +- bb.0: +- $r4 = AMMAX_WU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMAX_DU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammax.du $a0, $a1, $a2 +-name: test_AMMAX_DU +-body: | +- bb.0: +- $r4 = AMMAX_DU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_WU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin.wu $a0, $a1, $a2 +-name: test_AMMIN_WU +-body: | +- bb.0: +- $r4 = AMMIN_WU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_AMMIN_DU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 +-# CHECK-ASM: ammin.du $a0, $a1, $a2 +-name: test_AMMIN_DU +-body: | +- bb.0: +- $r4 = AMMIN_DU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.b $a0, $a1, $a2 +-name: test_LDX_B +-body: | +- bb.0: +- $r4 = LDX_B $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.h $a0, $a1, $a2 +-name: test_LDX_H +-body: | +- bb.0: +- $r4 = LDX_H $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.w $a0, $a1, $a2 +-name: test_LDX_W +-body: | +- bb.0: +- $r4 = LDX_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.d $a0, $a1, $a2 +-name: test_LDX_D +-body: | +- bb.0: +- $r4 = LDX_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_BU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.bu $a0, $a1, $a2 +-name: test_LDX_BU +-body: | +- bb.0: +- $r4 = LDX_BU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_HU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.hu $a0, $a1, $a2 +-name: test_LDX_HU +-body: | +- bb.0: +- $r4 = LDX_HU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDX_WU: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldx.wu $a0, $a1, $a2 +-name: test_LDX_WU +-body: | +- bb.0: +- $r4 = LDX_WU $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDGT_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldgt.b $a0, $a1, $a2 +-name: test_LDGT_B +-body: | +- bb.0: +- $r4 = LDGT_B $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDGT_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldgt.h $a0, $a1, $a2 +-name: test_LDGT_H +-body: | +- bb.0: +- $r4 = LDGT_H $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDGT_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldgt.w $a0, $a1, $a2 +-name: test_LDGT_W +-body: | +- bb.0: +- $r4 = LDGT_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDGT_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldgt.d $a0, $a1, $a2 +-name: test_LDGT_D +-body: | +- bb.0: +- $r4 = LDGT_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDLE_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldle.b $a0, $a1, $a2 +-name: test_LDLE_B +-body: | +- bb.0: +- $r4 = LDLE_B $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDLE_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldle.h $a0, $a1, $a2 +-name: test_LDLE_H +-body: | +- bb.0: +- $r4 = LDLE_H $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDLE_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldle.w $a0, $a1, $a2 +-name: test_LDLE_W +-body: | +- bb.0: +- $r4 = LDLE_W $r5, $r6 +-... +---- +-# CHECK-LABEL: test_LDLE_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: ldle.d $a0, $a1, $a2 +-name: test_LDLE_D +-body: | +- bb.0: +- $r4 = LDLE_D $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STX_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stx.b $a0, $a1, $a2 +-name: test_STX_B +-body: | +- bb.0: +- STX_B $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STX_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stx.h $a0, $a1, $a2 +-name: test_STX_H +-body: | +- bb.0: +- STX_H $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STX_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stx.w $a0, $a1, $a2 +-name: test_STX_W +-body: | +- bb.0: +- STX_W $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STX_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stx.d $a0, $a1, $a2 +-name: test_STX_D +-body: | +- bb.0: +- STX_D $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STGT_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stgt.b $a0, $a1, $a2 +-name: test_STGT_B +-body: | +- bb.0: +- STGT_B $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STGT_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stgt.h $a0, $a1, $a2 +-name: test_STGT_H +-body: | +- bb.0: +- STGT_H $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STGT_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stgt.w $a0, $a1, $a2 +-name: test_STGT_W +-body: | +- bb.0: +- STGT_W $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STGT_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stgt.d $a0, $a1, $a2 +-name: test_STGT_D +-body: | +- bb.0: +- STGT_D $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STLE_B: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stle.b $a0, $a1, $a2 +-name: test_STLE_B +-body: | +- bb.0: +- STLE_B $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STLE_H: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stle.h $a0, $a1, $a2 +-name: test_STLE_H +-body: | +- bb.0: +- STLE_H $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STLE_W: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stle.w $a0, $a1, $a2 +-name: test_STLE_W +-body: | +- bb.0: +- STLE_W $r4, $r5, $r6 +-... +---- +-# CHECK-LABEL: test_STLE_D: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: stle.d $a0, $a1, $a2 +-name: test_STLE_D +-body: | +- bb.0: +- STLE_D $r4, $r5, $r6 +diff --git a/llvm/test/CodeGen/LoongArch/3ri.mir b/llvm/test/CodeGen/LoongArch/3ri.mir +deleted file mode 100644 +index c86e14189..000000000 +--- a/llvm/test/CodeGen/LoongArch/3ri.mir ++++ /dev/null +@@ -1,69 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 3RI2 +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------+-----+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------+-----+--------------+--------------+--------------- +-# opcode |imm2 | rk | rj | rd +-# ---------------------------------------------+-----+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_ALSL_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: alsl.w $a0, $a1, $a2, 4 +-name: test_ALSL_W +-body: | +- bb.0: +- $r4 = ALSL_W $r5, $r6, 4 +-... +---- +-# CHECK-LABEL: test_ALSL_WU: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: alsl.wu $a0, $a1, $a2, 2 +-name: test_ALSL_WU +-body: | +- bb.0: +- $r4 = ALSL_WU $r5, $r6, 2 +-... +---- +-# CHECK-LABEL: test_ALSL_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: alsl.d $a0, $a1, $a2, 4 +-name: test_ALSL_D +-body: | +- bb.0: +- $r4 = ALSL_D $r5, $r6, 4 +-... +---- +-# CHECK-LABEL: test_BYTEPICK_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bytepick.w $a0, $a1, $a2, 0 +-name: test_BYTEPICK_W +-body: | +- bb.0: +- $r4 = BYTEPICK_W $r5, $r6, 0 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: 3RI3 +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------------------+--------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------------------+--------+--------------+--------------+--------------- +-# opcode | imm3 | rk | rj | rd +-# ------------------------------------------+--------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_BYTEPICK_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bytepick.d $a0, $a1, $a2, 4 +-name: test_BYTEPICK_D +-body: | +- bb.0: +- $r4 = BYTEPICK_D $r5, $r6, 4 +diff --git a/llvm/test/CodeGen/LoongArch/align.ll b/llvm/test/CodeGen/LoongArch/align.ll +new file mode 100644 +index 000000000..c5b08dbd4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/align.ll +@@ -0,0 +1,8 @@ ++; RUN: llc -mtriple=loongarch64 %s -o - | FileCheck %s ++ ++define void @foo() { ++;CHECK: .p2align 2 ++;CHECK: foo: ++entry: ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll b/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll +new file mode 100644 +index 000000000..d1d0c0bc4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll +@@ -0,0 +1,17 @@ ++; Test that the last immediate 0 operand of amtomic instruction is printed ++ ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define void @test_i32(i32* %dst, i32 %val) { ++; CHECK: ammax_db.wu $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 ++entry: ++ %a = atomicrmw umax i32* %dst, i32 %val monotonic ++ ret void ++} ++ ++define void @test_i64(i64* %dst, i64 %val) { ++; CHECK: ammax_db.du $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 ++entry: ++ %a = atomicrmw umax i64* %dst, i64 %val monotonic ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +new file mode 100644 +index 000000000..d5c3e0dad +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +@@ -0,0 +1,809 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s ++ ++ ++define void @umax_8(i8* %ptr) { ++; CHECK-LABEL: umax_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB0_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umax i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @umax_16(i16* %ptr) { ++; CHECK-LABEL: umax_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB1_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umax i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++define void @max_8(i8* %ptr) { ++; CHECK-LABEL: max_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB2_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw max i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @max_16(i16* %ptr) { ++; CHECK-LABEL: max_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB3_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw max i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @umin_8(i8* %ptr) { ++; CHECK-LABEL: umin_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB4_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umin i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @umin_16(i16* %ptr) { ++; CHECK-LABEL: umin_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB5_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umin i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++define void @min_8(i8* %ptr) { ++; CHECK-LABEL: min_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB6_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw min i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @min_16(i16* %ptr) { ++; CHECK-LABEL: min_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r7 ++; CHECK-NEXT: and $r5, $r5, $r7 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB7_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw min i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @or_8(i8* %ptr) { ++; CHECK-LABEL: or_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB8_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw or i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @or_16(i16* %ptr) { ++; CHECK-LABEL: or_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB9_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw or i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @add_8(i8* %ptr) { ++; CHECK-LABEL: add_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB10_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw add i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @add_16(i16* %ptr) { ++; CHECK-LABEL: add_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB11_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw add i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @sub_8(i8* %ptr) { ++; CHECK-LABEL: sub_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB12_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw sub i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @sub_16(i16* %ptr) { ++; CHECK-LABEL: sub_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB13_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw sub i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @and_8(i8* %ptr) { ++; CHECK-LABEL: and_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB14_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw and i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @and_16(i16* %ptr) { ++; CHECK-LABEL: and_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB15_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw and i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @nand_8(i8* %ptr) { ++; CHECK-LABEL: nand_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw nand i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @nand_16(i16* %ptr) { ++; CHECK-LABEL: nand_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw nand i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @xor_8(i8* %ptr) { ++; CHECK-LABEL: xor_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB18_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xor i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @xor_16(i16* %ptr) { ++; CHECK-LABEL: xor_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB19_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xor i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++ ++define void @xchg_8(i8* %ptr) { ++; CHECK-LABEL: xchg_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r7, $zero, 255 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB20_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.b $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xchg i8* %ptr, i8 100 seq_cst ++ ret void ++} ++ ++define void @xchg_16(i16* %ptr) { ++; CHECK-LABEL: xchg_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r7, 15 ++; CHECK-NEXT: ori $r7, $r7, 4095 ++; CHECK-NEXT: sll.w $r7, $r7, $r4 ++; CHECK-NEXT: nor $r8, $zero, $r7 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r7 ++; CHECK-NEXT: and $r12, $r10, $r8 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB21_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r9, $r10, $r7 ++; CHECK-NEXT: srl.w $r9, $r9, $r4 ++; CHECK-NEXT: ext.w.h $r9, $r9 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xchg i16* %ptr, i16 100 seq_cst ++ ret void ++} ++ ++define void @cmpxchg_8(i8* %ptr) { ++; CHECK-LABEL: cmpxchg_8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 1 ++; CHECK-NEXT: ori $r6, $zero, 100 ++; CHECK-NEXT: addi.d $r7, $zero, -4 ++; CHECK-NEXT: and $r7, $r4, $r7 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: ori $r8, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r8, $r4 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: andi $r6, $r6, 255 ++; CHECK-NEXT: sll.w $r6, $r6, $r4 ++; CHECK-NEXT: andi $r5, $r5, 255 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r11, $r7, 0 ++; CHECK-NEXT: and $r12, $r11, $r8 ++; CHECK-NEXT: bne $r12, $r6, .LBB22_3 ++; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; CHECK-NEXT: and $r11, $r11, $r9 ++; CHECK-NEXT: or $r11, $r11, $r5 ++; CHECK-NEXT: sc.w $r11, $r7, 0 ++; CHECK-NEXT: beq $r11, $zero, .LBB22_1 ++; CHECK-NEXT: .LBB22_3: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: srl.w $r10, $r12, $r4 ++; CHECK-NEXT: ext.w.b $r10, $r10 ++; CHECK-NEXT: # %bb.4: ++; CHECK-NEXT: jr $ra ++ %ret = cmpxchg i8* %ptr, i8 100, i8 1 seq_cst seq_cst ++ ret void ++} ++ ++define void @cmpxchg_16(i16* %ptr) { ++; CHECK-LABEL: cmpxchg_16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 1 ++; CHECK-NEXT: ori $r6, $zero, 100 ++; CHECK-NEXT: addi.d $r7, $zero, -4 ++; CHECK-NEXT: and $r7, $r4, $r7 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r4, $r4, 3 ++; CHECK-NEXT: lu12i.w $r8, 15 ++; CHECK-NEXT: ori $r8, $r8, 4095 ++; CHECK-NEXT: sll.w $r9, $r8, $r4 ++; CHECK-NEXT: nor $r10, $zero, $r9 ++; CHECK-NEXT: and $r6, $r6, $r8 ++; CHECK-NEXT: sll.w $r6, $r6, $r4 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r4 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r11, $r7, 0 ++; CHECK-NEXT: and $r12, $r11, $r9 ++; CHECK-NEXT: bne $r12, $r6, .LBB23_3 ++; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; CHECK-NEXT: and $r11, $r11, $r10 ++; CHECK-NEXT: or $r11, $r11, $r5 ++; CHECK-NEXT: sc.w $r11, $r7, 0 ++; CHECK-NEXT: beq $r11, $zero, .LBB23_1 ++; CHECK-NEXT: .LBB23_3: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: srl.w $r8, $r12, $r4 ++; CHECK-NEXT: ext.w.h $r8, $r8 ++; CHECK-NEXT: # %bb.4: ++; CHECK-NEXT: jr $ra ++ %ret = cmpxchg i16* %ptr, i16 100, i16 1 seq_cst seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +new file mode 100644 +index 000000000..ce400fd43 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +@@ -0,0 +1,327 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s ++ ++ ++define void @umax_32(i32* %ptr) { ++; CHECK-LABEL: umax_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammax_db.wu $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umax i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @umax_64(i64* %ptr) { ++; CHECK-LABEL: umax_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umax i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++define void @max_32(i32* %ptr) { ++; CHECK-LABEL: max_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammax_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw max i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @max_64(i64* %ptr) { ++; CHECK-LABEL: max_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw max i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @umin_32(i32* %ptr) { ++; CHECK-LABEL: umin_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammin_db.wu $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umin i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @umin_64(i64* %ptr) { ++; CHECK-LABEL: umin_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw umin i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++define void @min_32(i32* %ptr) { ++; CHECK-LABEL: min_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammin_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw min i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @min_64(i64* %ptr) { ++; CHECK-LABEL: min_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw min i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @or_32(i32* %ptr) { ++; CHECK-LABEL: or_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amor_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw or i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @or_64(i64* %ptr) { ++; CHECK-LABEL: or_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw or i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @add_32(i32* %ptr) { ++; CHECK-LABEL: add_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amadd_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw add i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @add_64(i64* %ptr) { ++; CHECK-LABEL: add_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw add i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @sub_32(i32* %ptr) { ++; CHECK-LABEL: sub_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: sub.w $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.w $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw sub i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @sub_64(i64* %ptr) { ++; CHECK-LABEL: sub_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw sub i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @and_32(i32* %ptr) { ++; CHECK-LABEL: and_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amand_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw and i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @and_64(i64* %ptr) { ++; CHECK-LABEL: and_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw and i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @nand_32(i32* %ptr) { ++; CHECK-LABEL: nand_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw nand i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @nand_64(i64* %ptr) { ++; CHECK-LABEL: nand_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw nand i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @xor_32(i32* %ptr) { ++; CHECK-LABEL: xor_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amxor_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xor i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @xor_64(i64* %ptr) { ++; CHECK-LABEL: xor_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xor i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++ ++define void @xchg_32(i32* %ptr) { ++; CHECK-LABEL: xchg_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 100 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amswap_db.w $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xchg i32* %ptr, i32 100 seq_cst ++ ret void ++} ++ ++define void @xchg_64(i64* %ptr) { ++; CHECK-LABEL: xchg_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 100 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: jr $ra ++ %ret = atomicrmw xchg i64* %ptr, i64 100 seq_cst ++ ret void ++} ++ ++define void @cmpxchg_32(i32* %ptr) { ++; CHECK-LABEL: cmpxchg_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 1 ++; CHECK-NEXT: ori $r6, $zero, 100 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB22_3 ++; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB22_1 ++; CHECK-NEXT: .LBB22_3: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: jr $ra ++ %ret = cmpxchg i32* %ptr, i32 100, i32 1 seq_cst seq_cst ++ ret void ++} ++ ++define void @cmpxchg_64(i64* %ptr) { ++; CHECK-LABEL: cmpxchg_64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r5, $zero, 1 ++; CHECK-NEXT: addi.d $r6, $zero, 100 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB23_3 ++; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.d $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB23_1 ++; CHECK-NEXT: .LBB23_3: ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: jr $ra ++ %ret = cmpxchg i64* %ptr, i64 100, i64 1 seq_cst seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/bss.ll b/llvm/test/CodeGen/LoongArch/bss.ll +new file mode 100644 +index 000000000..cfc30b3a7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/bss.ll +@@ -0,0 +1,5 @@ ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++; CHECK: .section .bss,"aw",@nobits ++; CHECK: .globl a ++@a = global i32 0, align 4 +diff --git a/llvm/test/CodeGen/LoongArch/bstrins_d.ll b/llvm/test/CodeGen/LoongArch/bstrins_d.ll +index 342e044c7..819bfdbb3 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrins_d.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrins_d.ll +@@ -1,207 +1,53 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +- +-;; Test generation of the bstrins.d instruction. +-;; There are 8 patterns that can be matched to bstrins.d. See performORCombine +-;; for details. +- +-;; Pattern 1 +-;; R = or (and X, mask0), (and (shl Y, lsb), mask1) +-;; => +-;; R = BSTRINS X, Y, msb, lsb +-define i64 @pat1(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat1: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff +- %shl = shl i64 %b, 16 +- %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 +- %or = or i64 %and1, %and2 +- ret i64 %or +-} +- +-define i64 @pat1_swap(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat1_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff +- %shl = shl i64 %b, 16 +- %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 +- %or = or i64 %and2, %and1 +- ret i64 %or +-} +- +-;; Pattern 2 +-;; R = or (and X, mask0), (shl (and Y, mask1), lsb) +-;; => +-;; R = BSTRINS X, Y, msb, lsb +-define i64 @pat2(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat2: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff +- %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff +- %shl = shl i64 %and2, 16 +- %or = or i64 %and1, %shl +- ret i64 %or +-} +- +-define i64 @pat2_swap(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat2_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff +- %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff +- %shl = shl i64 %and2, 16 +- %or = or i64 %shl, %and1 +- ret i64 %or +-} +- +-;; Pattern 3 +-;; R = or (and X, mask0), (and Y, mask1) +-;; => +-;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb +-define i64 @pat3(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat3: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a1, $a1, 288 +-; CHECK-NEXT: srli.d $a1, $a1, 4 +-; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f +- %and2 = and i64 %b, 288 ; 0x0000000000000120 +- %or = or i64 %and1, %and2 +- ret i64 %or +-} +- +-define i64 @pat3_swap(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat3_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a1, $a1, 288 +-; CHECK-NEXT: srli.d $a1, $a1, 4 +-; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f +- %and2 = and i64 %b, 288 ; 0x0000000000000120 +- %or = or i64 %and2, %and1 +- ret i64 %or +-} +- +-;; Pattern 4 +-;; R = or (and X, mask), (shl Y, shamt) +-;; => +-;; R = BSTRINS X, Y, 63, shamt +-define i64 @pat4(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 255 +- %shl = shl i64 %b, 8 ++; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s ++ ++define void @bstrinsd_63_27(i64* nocapture %d) nounwind { ++; CHECK-LABEL: bstrinsd_63_27: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ld.d $r5, $r4, 0 ++; CHECK-NEXT: addi.d $r6, $zero, 123 ++; CHECK-NEXT: bstrins.d $r5, $r6, 63, 27 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++entry: ++ %tmp = load i64, i64* %d, align 8 ++ %and5 = and i64 %tmp, 134217727 ++ %or = or i64 %and5, 16508780544 ++ store i64 %or, i64* %d, align 8 ++ ret void ++} ++ ++define void @bstrinsd_33_28(i64* nocapture %d) nounwind { ++; CHECK-LABEL: bstrinsd_33_28: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ld.d $r5, $r4, 0 ++; CHECK-NEXT: addi.d $r6, $zero, 4 ++; CHECK-NEXT: bstrins.d $r5, $r6, 33, 28 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++entry: ++ %tmp = load i64, i64* %d, align 8 ++ %and5 = and i64 %tmp, -16911433729 ++ %or = or i64 %and5, 1073741824 ++ store i64 %or, i64* %d, align 8 ++ ret void ++} ++ ++define void @bstrinsd_49_34(i64* nocapture %d) nounwind { ++; CHECK-LABEL: bstrinsd_49_34: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ld.d $r5, $r4, 0 ++; CHECK-NEXT: srli.d $r6, $r5, 50 ++; CHECK-NEXT: bstrins.d $r5, $r6, 49, 34 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++entry: ++ %tmp0 = load i64, i64* %d, align 8 ++ %lshr = lshr i64 %tmp0, 50 ++ %tmp1 = load i64, i64* %d, align 8 ++ %shl = shl nuw nsw i64 %lshr, 34 ++ %and = and i64 %tmp1, -1125882726973441 + %or = or i64 %and, %shl +- ret i64 %or +-} +- +-define i64 @pat4_swap(i64 %a, i64 %b) nounwind { +-; CHECK-LABEL: pat4_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 255 +- %shl = shl i64 %b, 8 +- %or = or i64 %shl, %and +- ret i64 %or +-} +- +-;; Pattern 5 +-;; R = or (and X, mask0), const +-;; => +-;; R = BSTRINS X, (const >> lsb), msb, lsb +-define i64 @pat5(i64 %a) nounwind { +-; CHECK-LABEL: pat5: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 74565 +-; CHECK-NEXT: ori $a1, $a1, 1656 +-; CHECK-NEXT: bstrins.d $a0, $a1, 47, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff +- %or = or i64 %and, 20015998304256 ; 0x0000123456780000 +- ret i64 %or +-} +- +-;; Pattern 6: a = b | ((c & mask) << shamt) +-;; In this testcase b is 0x123456000000789a, but in fact we do not require b +-;; being a constant. As long as all positions in b to be overwritten by the +-;; incoming bits are known to be zero, the pattern could be matched. +-define i64 @pat6(i64 %c) nounwind { +-; CHECK-LABEL: pat6: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 7 +-; CHECK-NEXT: ori $a1, $a1, 2202 +-; CHECK-NEXT: lu32i.d $a1, 284160 +-; CHECK-NEXT: lu52i.d $a1, $a1, 291 +-; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 +-; CHECK-NEXT: move $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %c, 16777215 ; 0x0000000000ffffff +- %shl = shl i64 %and, 16 +- %or = or i64 %shl, 1311767949471676570 ; 0x123456000000789a +- ret i64 %or +-} +- +-;; Pattern 7: a = b | ((c << shamt) & shifted_mask) +-;; Similar to pattern 6. +-define i64 @pat7(i64 %c) nounwind { +-; CHECK-LABEL: pat7: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 7 +-; CHECK-NEXT: ori $a1, $a1, 2202 +-; CHECK-NEXT: lu32i.d $a1, 284160 +-; CHECK-NEXT: lu52i.d $a1, $a1, 291 +-; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 +-; CHECK-NEXT: move $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shl = shl i64 %c, 16 +- %and = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 +- %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a +- ret i64 %or +-} +- +-;; Pattern 8: a = b | (c & shifted_mask) +-;; Similar to pattern 7 but without shift to c. +-define i64 @pat8(i64 %c) nounwind { +-; CHECK-LABEL: pat8: +-; CHECK: # %bb.0: +-; CHECK-NEXT: srli.d $a1, $a0, 16 +-; CHECK-NEXT: lu12i.w $a0, 7 +-; CHECK-NEXT: ori $a0, $a0, 2202 +-; CHECK-NEXT: lu32i.d $a0, 284160 +-; CHECK-NEXT: lu52i.d $a0, $a0, 291 +-; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %c, 1099511562240 ; 0x000000ffffff0000 +- %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a +- ret i64 %or +-} +- +-;; Test that bstrins.d is not generated because constant OR operand +-;; doesn't fit into bits cleared by constant AND operand. +-define i64 @no_bstrins_d(i64 %a) nounwind { +-; CHECK-LABEL: no_bstrins_d: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 354185 +-; CHECK-NEXT: lu32i.d $a1, 4660 +-; CHECK-NEXT: or $a0, $a0, $a1 +-; CHECK-NEXT: lu12i.w $a1, 354191 +-; CHECK-NEXT: ori $a1, $a1, 4095 +-; CHECK-NEXT: lu32i.d $a1, -60876 +-; CHECK-NEXT: and $a0, $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff +- %or = or i64 %and, 20015998341120 ; 0x0000123456789000 +- ret i64 %or ++ store i64 %or, i64* %d, align 8 ++ ret void + } +diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +index 47c4d826c..3b62a760e 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +@@ -1,212 +1,28 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s +- +-;; Test generation of the bstrins.w instruction. +-;; There are 8 patterns that can be matched to bstrins.w. See performORCombine +-;; for details. +- +-;; Pattern 1 +-;; R = or (and X, mask0), (and (shl Y, lsb), mask1) +-;; => +-;; R = BSTRINS X, Y, msb, lsb +-define i32 @pat1(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat1: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -1048321 ; 0xfff000ff +- %shl = shl i32 %b, 8 +- %and2 = and i32 %shl, 1048320 ; 0x000fff00 +- %or = or i32 %and1, %and2 +- ret i32 %or +-} +- +-define i32 @pat1_swap(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat1_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -1048321 ; 0xfff000ff +- %shl = shl i32 %b, 8 +- %and2 = and i32 %shl, 1048320 ; 0x000fff00 +- %or = or i32 %and2, %and1 +- ret i32 %or +-} +- +-;; Pattern 2 +-;; R = or (and X, mask0), (shl (and Y, mask1), lsb) +-;; => +-;; R = BSTRINS X, Y, msb, lsb +-define i32 @pat2(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat2: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -1048321 ; 0xfff000ff +- %and2 = and i32 %b, 4095 ; 0x00000fff +- %shl = shl i32 %and2, 8 +- %or = or i32 %and1, %shl +- ret i32 %or +-} +- +-define i32 @pat2_swap(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat2_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -1048321 ; 0xfff000ff +- %and2 = and i32 %b, 4095 ; 0x00000fff +- %shl = shl i32 %and2, 8 +- %or = or i32 %shl, %and1 +- ret i32 %or +-} +- +-;; Pattern 3 +-;; R = or (and X, mask0), (and Y, mask1) +-;; => +-;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb +-define i32 @pat3(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat3: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a1, $a1, 288 +-; CHECK-NEXT: srli.w $a1, $a1, 4 +-; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -4081 ; 0xfffff00f +- %and2 = and i32 %b, 288 ; 0x00000120 +- %or = or i32 %and1, %and2 +- ret i32 %or +-} +- +-define i32 @pat3_swap(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat3_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a1, $a1, 288 +-; CHECK-NEXT: srli.w $a1, $a1, 4 +-; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, -4081 ; 0xfffff00f +- %and2 = and i32 %b, 288 ; 0x00000120 +- %or = or i32 %and2, %and1 +- ret i32 %or +-} +- +-define i32 @pat3_positive_mask0(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat3_positive_mask0: +-; CHECK: # %bb.0: +-; CHECK-NEXT: srli.w $a1, $a1, 28 +-; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and1 = and i32 %a, 268435455 ; 0x0fffffff +- %and2 = and i32 %b, 4026531840 ; 0xf0000000 +- %or = or i32 %and1, %and2 +- ret i32 %or +-} +- +-;; Pattern 4 +-;; R = or (and X, mask), (shl Y, shamt) +-;; => +-;; R = BSTRINS X, Y, 31, shamt +-define i32 @pat4(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 268435455 ; 0x0fffffff +- %shl = shl i32 %b, 28 +- %or = or i32 %and, %shl +- ret i32 %or +-} +- +-define i32 @pat4_swap(i32 %a, i32 %b) nounwind { +-; CHECK-LABEL: pat4_swap: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 268435455 ; 0x0fffffff +- %shl = shl i32 %b, 28 +- %or = or i32 %shl, %and +- ret i32 %or +-} +- +-;; Pattern 5 +-;; R = or (and X, mask), const +-;; => +-;; R = BSTRINS X, (const >> lsb), msb, lsb +-define i32 @pat5(i32 %a) nounwind { +-; CHECK-LABEL: pat5: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 1 +-; CHECK-NEXT: ori $a1, $a1, 564 +-; CHECK-NEXT: bstrins.w $a0, $a1, 23, 8 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4278190335 ; 0xff0000ff +- %or = or i32 %and, 1192960 ; 0x00123400 +- ret i32 %or +-} +- +-;; Pattern 6: a = b | ((c & mask) << shamt) +-;; In this testcase b is 0x10000002, but in fact we do not require b being a +-;; constant. As long as all positions in b to be overwritten by the incoming +-;; bits are known to be zero, the pattern could be matched. +-define i32 @pat6(i32 %c) nounwind { +-; CHECK-LABEL: pat6: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 65536 +-; CHECK-NEXT: ori $a1, $a1, 2 +-; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 +-; CHECK-NEXT: move $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %c, 16777215 ; 0x00ffffff +- %shl = shl i32 %and, 4 +- %or = or i32 %shl, 268435458 ; 0x10000002 +- ret i32 %or +-} +- +-;; Pattern 7: a = b | ((c << shamt) & shifted_mask) +-;; Similar to pattern 6. +-define i32 @pat7(i32 %c) nounwind { +-; CHECK-LABEL: pat7: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 65536 +-; CHECK-NEXT: ori $a1, $a1, 2 +-; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 +-; CHECK-NEXT: move $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shl = shl i32 %c, 4 +- %and = and i32 %shl, 268435440 ; 0x0ffffff0 +- %or = or i32 %and, 268435458 ; 0x10000002 +- ret i32 %or +-} +- +-;; Pattern 8: a = b | (c & shifted_mask) +-;; Similar to pattern 7 but without shift to c. +-define i32 @pat8(i32 %c) nounwind { +-; CHECK-LABEL: pat8: +-; CHECK: # %bb.0: +-; CHECK-NEXT: srli.w $a1, $a0, 4 +-; CHECK-NEXT: lu12i.w $a0, 65536 +-; CHECK-NEXT: ori $a0, $a0, 2 +-; CHECK-NEXT: bstrins.w $a0, $a1, 27, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %c, 268435440 ; 0x0ffffff0 +- %or = or i32 %and, 268435458 ; 0x10000002 +- ret i32 %or +-} +- +-;; Test that bstrins.w is not generated because constant OR operand +-;; doesn't fit into bits cleared by constant AND operand. +-define i32 @no_bstrins_w(i32 %a) nounwind { +-; CHECK-LABEL: no_bstrins_w: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a1, 291 +-; CHECK-NEXT: ori $a1, $a1, 1104 +-; CHECK-NEXT: or $a0, $a0, $a1 +-; CHECK-NEXT: lu12i.w $a1, -3805 +-; CHECK-NEXT: ori $a1, $a1, 1279 +-; CHECK-NEXT: and $a0, $a0, $a1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4278190335 ; 0xff0000ff +- %or = or i32 %and, 1193040 ; 0x00123450 +- ret i32 %or ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define void @bstrins_w(i32 %s, i32* nocapture %d) nounwind { ++; CHECK-LABEL: bstrins_w: ++; CHECK: bstrins.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 13, 5 ++entry: ++ %and = shl i32 %s, 5 ++ %shl = and i32 %and, 16352 ++ %tmp3 = load i32, i32* %d, align 4 ++ %and5 = and i32 %tmp3, -16353 ++ %or = or i32 %and5, %shl ++ store i32 %or, i32* %d, align 4 ++ ret void ++} ++ ++define i32 @no_bstrinsw(i32* nocapture %d) { ++; CHECK-LABEL: no_bstrinsw: ++; CHECK: addi.w $r[[REG2:[0-9]+]], $zero, -4 ++; CHECK: and $r[[REG1:[0-9]+]], $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] ++; CHECK: ori $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 8 ++; CHECK-NOT: bstrins.w {{[[:space:]].*}} ++entry: ++ %tmp = load volatile i32, i32* %d, align 4 ++ %and = and i32 %tmp, -4 ++ %or = or i32 %and, 8 ++ store volatile i32 %or, i32* %d, align 4 ++ ret i32 %and + } +diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll +index 51d4967dc..e1169cb21 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll +@@ -1,97 +1,64 @@ +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define i64 @bstrpickd_add_zext(i32 signext %n) { ++entry: ++ %add = add i32 %n, 1 ++ %res = zext i32 %add to i64 ++ ret i64 %res ++ ++; CHECK-LABEL: bstrpickd_add_zext: ++; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 + +-define i64 @lshr40_and255(i64 %a) { +-; CHECK-LABEL: lshr40_and255: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 47, 40 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shr = lshr i64 %a, 40 +- %and = and i64 %shr, 255 +- ret i64 %and + } + +-define i64 @ashr50_and511(i64 %a) { +-; CHECK-LABEL: ashr50_and511: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 58, 50 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shr = ashr i64 %a, 50 +- %and = and i64 %shr, 511 ++define i64 @bstrpickd_and12(i64 zeroext %a) { ++entry: ++ %and = and i64 %a, 4095 + ret i64 %and +-} + +-define i64 @zext_i32_to_i64(i32 %a) { +-; CHECK-LABEL: zext_i32_to_i64: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %res = zext i32 %a to i64 +- ret i64 %res ++; CHECK-LABEL: bstrpickd_and12: ++; CHECK: andi $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 4095 ++ + } + +-define i64 @and8191(i64 %a) { +-; CHECK-LABEL: and8191: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 12, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 ++define i64 @bstrpickd_and13(i64 zeroext %a) { ++entry: + %and = and i64 %a, 8191 + ret i64 %and ++ ++; CHECK-LABEL: bstrpickd_and13: ++; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 12, 0 ++ + } + +-;; Check that andi but not bstrpick.d is generated. +-define i64 @and4095(i64 %a) { +-; CHECK-LABEL: and4095: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 4095 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 4095 ++define i64 @bstrpickd_lsr_and8(i64 zeroext %a) { ++entry: ++ %shr = lshr i64 %a, 40 ++ %and = and i64 %shr, 255 + ret i64 %and +-} + +-;; (srl (and a, 0xff0), 4) => (BSTRPICK a, 11, 4) +-define i64 @and0xff0_lshr4(i64 %a) { +-; CHECK-LABEL: and0xff0_lshr4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 4080 +- %shr = lshr i64 %and, 4 +- ret i64 %shr +-} ++; CHECK-LABEL: bstrpickd_lsr_and8: ++; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 47, 40 + +-;; (sra (and a, 0xff0), 5) can also be combined to (BSTRPICK a, 11, 5). +-;; This is because (sra (and a, 0xff0)) would be combined to (srl (and a, 0xff0), 5) +-;; firstly by DAGCombiner::SimplifyDemandedBits. +-define i64 @and4080_ashr5(i64 %a) { +-; CHECK-LABEL: and4080_ashr5: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 5 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 4080 +- %shr = ashr i64 %and, 5 +- ret i64 %shr + } + +-;; Negative test: the second operand of AND is not a shifted mask +-define i64 @and0xf30_lshr4(i64 %a) { +-; CHECK-LABEL: and0xf30_lshr4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 3888 +-; CHECK-NEXT: srli.d $a0, $a0, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 3888 +- %shr = lshr i64 %and, 4 +- ret i64 %shr ++define i64 @bstrpickd_zext(i32 signext %a) { ++entry: ++ %conv = zext i32 %a to i64 ++ ret i64 %conv ++ ++; CHECK-LABEL: bstrpickd_zext: ++; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 ++ + } + +-;; Negative test: Shamt < MaskIdx +-define i64 @and0xff0_lshr3(i64 %a) { +-; CHECK-LABEL: and0xff0_lshr3: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 4080 +-; CHECK-NEXT: srli.d $a0, $a0, 3 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i64 %a, 4080 +- %shr = lshr i64 %and, 3 ++define i64 @bstrpickd_and_lsr(i64 zeroext %n) { ++entry: ++ %and = lshr i64 %n, 8 ++ %shr = and i64 %and, 4095 + ret i64 %shr ++ ++; CHECK-LABEL: bstrpickd_and_lsr: ++; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 19, 8 ++ + } +diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll +index 92d79019a..e60de4737 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll +@@ -1,97 +1,18 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +-define i32 @lshr10_and255(i32 %a) { +-; CHECK-LABEL: lshr10_and255: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 17, 10 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shr = lshr i32 %a, 10 +- %and = and i32 %shr, 255 ++define i32 @bstrpickw_and24(i32 signext %a) { ++; CHECK-LABEL: bstrpickw_and24: ++; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 23, 0 ++entry: ++ %and = and i32 %a, 16777215 + ret i32 %and + } + +-define i32 @ashr20_and511(i32 %a) { +-; CHECK-LABEL: ashr20_and511: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 28, 20 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %shr = ashr i32 %a, 20 ++define i32 @bstrpickw_lshr_and(i32 %s, i32 %pos, i32 %sz) nounwind readnone { ++; CHECK-LABEL: bstrpickw_lshr_and: ++; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 13, 5 ++entry: ++ %shr = lshr i32 %s, 5 + %and = and i32 %shr, 511 + ret i32 %and + } +- +-define i32 @zext_i16_to_i32(i16 %a) { +-; CHECK-LABEL: zext_i16_to_i32: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %res = zext i16 %a to i32 +- ret i32 %res +-} +- +-define i32 @and8191(i32 %a) { +-; CHECK-LABEL: and8191: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 12, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 8191 +- ret i32 %and +-} +- +-;; Check that andi but not bstrpick.d is generated. +-define i32 @and4095(i32 %a) { +-; CHECK-LABEL: and4095: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 4095 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4095 +- ret i32 %and +-} +- +-;; (srl (and a, 0xff0), 4) => (BSTRPICK a, 11, 4) +-define i32 @and0xff0_lshr4(i32 %a) { +-; CHECK-LABEL: and0xff0_lshr4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4080 +- %shr = lshr i32 %and, 4 +- ret i32 %shr +-} +- +-;; (sra (and a, 0xff0), 5) can also be combined to (BSTRPICK a, 11, 5). +-;; This is because (sra (and a, 0xff0)) would be combined to (srl (and a, 0xff0), 5) +-;; firstly by DAGCombiner::SimplifyDemandedBits. +-define i32 @and4080_ashr5(i32 %a) { +-; CHECK-LABEL: and4080_ashr5: +-; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 5 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4080 +- %shr = ashr i32 %and, 5 +- ret i32 %shr +-} +- +-;; Negative test: the second operand of AND is not a shifted mask +-define i32 @and0xf30_lshr4(i32 %a) { +-; CHECK-LABEL: and0xf30_lshr4: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 3888 +-; CHECK-NEXT: srli.w $a0, $a0, 4 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 3888 +- %shr = lshr i32 %and, 4 +- ret i32 %shr +-} +- +-;; Negative test: Shamt < MaskIdx +-define i32 @and0xff0_lshr3(i32 %a) { +-; CHECK-LABEL: and0xff0_lshr3: +-; CHECK: # %bb.0: +-; CHECK-NEXT: andi $a0, $a0, 4080 +-; CHECK-NEXT: srli.w $a0, $a0, 3 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %and = and i32 %a, 4080 +- %shr = lshr i32 %and, 3 +- ret i32 %shr +-} +diff --git a/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll b/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll +new file mode 100644 +index 000000000..d6c8e72f2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll +@@ -0,0 +1,752 @@ ++; Test the base intrinsics. ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define void @cpucfg() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %u32_a = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i32 @llvm.loongarch.cpucfg(i32 %0) ++ store i32 %1, i32* %u32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.cpucfg(i32) ++ ++; CHECK-LABEL: cpucfg: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: cpucfg $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @csrrd_w() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %0 = call i32 @llvm.loongarch.csrrd.w(i32 1) ++ store i32 %0, i32* %u32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.csrrd.w(i32) ++ ++; CHECK-LABEL: csrrd_w: ++; CHECK: csrrd $r[[REG:[0-9]+]], 1 ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @csrrd_d() { ++entry: ++ %u64_r = alloca i64, align 8 ++ %0 = call i64 @llvm.loongarch.csrrd.d(i64 1) ++ store i64 %0, i64* %u64_r, align 8 ++ ret void ++} ++ ++declare i64 @llvm.loongarch.csrrd.d(i64) ++ ++; CHECK-LABEL: csrrd_d: ++; CHECK: csrrd $r[[REG:[0-9]+]], 1 ++; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @csrwr_w() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %u32_a = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i32 @llvm.loongarch.csrwr.w(i32 %0, i32 1) ++ store i32 %1, i32* %u32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.csrwr.w(i32, i32) ++ ++; CHECK-LABEL: csrwr_w: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: csrwr $r[[REG:[0-9]+]], 1 ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @csrwr_d() { ++entry: ++ %u64_r = alloca i64, align 8 ++ %u64_a = alloca i64, align 8 ++ %0 = load i64, i64* %u64_a, align 8 ++ %1 = call i64 @llvm.loongarch.csrwr.d(i64 %0, i64 1) ++ store i64 %1, i64* %u64_r, align 8 ++ ret void ++} ++ ++declare i64 @llvm.loongarch.csrwr.d(i64, i64) ++ ++; CHECK-LABEL: csrwr_d: ++; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 0 ++; CHECK: csrwr $r[[REG:[0-9]+]], 1 ++; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @csrxchg_w() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %u32_a = alloca i32, align 4 ++ %u32_b = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = load i32, i32* %u32_b, align 4 ++ %2 = call i32 @llvm.loongarch.csrxchg.w(i32 %0, i32 %1, i32 1) ++ store i32 %2, i32* %u32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32) ++ ++; CHECK-LABEL: csrxchg_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 ++; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 ++; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @csrxchg_d() { ++entry: ++ %u64_r = alloca i64, align 8 ++ %u64_a = alloca i64, align 8 ++ %u64_b = alloca i64, align 8 ++ %0 = load i64, i64* %u64_a, align 8 ++ %1 = load i64, i64* %u64_b, align 8 ++ %2 = call i64 @llvm.loongarch.csrxchg.d(i64 %0, i64 %1, i64 1) ++ store i64 %2, i64* %u64_r, align 8 ++ ret void ++} ++ ++declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i64) ++ ++; CHECK-LABEL: csrxchg_d: ++; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 16 ++; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 ++; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 24 ++; CHECK: jr $ra ++; ++ ++define void @iocsrrd_b() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u8_r = alloca i8, align 1 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i32 @llvm.loongarch.iocsrrd.b(i32 %0) ++ %conv = trunc i32 %1 to i8 ++ store i8 %conv, i8* %u8_r, align 1 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.iocsrrd.b(i32) ++ ++; CHECK-LABEL: iocsrrd_b: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: iocsrrd.b $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: st.b $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @iocsrrd_h() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u16_r = alloca i16, align 2 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i32 @llvm.loongarch.iocsrrd.h(i32 %0) ++ %conv = trunc i32 %1 to i16 ++ store i16 %conv, i16* %u16_r, align 2 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.iocsrrd.h(i32) ++ ++; CHECK-LABEL: iocsrrd_h: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: iocsrrd.h $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: st.h $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @iocsrrd_w() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %u32_a = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i32 @llvm.loongarch.iocsrrd.w(i32 %0) ++ store i32 %1, i32* %u32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.iocsrrd.w(i32) ++ ++; CHECK-LABEL: iocsrrd_w: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: iocsrrd.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @iocsrrd_d() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u64_r = alloca i64, align 8 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = call i64 @llvm.loongarch.iocsrrd.d(i32 %0) ++ store i64 %1, i64* %u64_r, align 8 ++ ret void ++} ++ ++declare i64 @llvm.loongarch.iocsrrd.d(i32) ++ ++; CHECK-LABEL: iocsrrd_d: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: iocsrrd.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: st.d $r[[REG:[0-9]+]], $sp, 0 ++; CHECK: jr $ra ++; ++ ++define void @iocsrwr_b() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u8_a = alloca i8, align 1 ++ %0 = load i8, i8* %u8_a, align 1 ++ %conv = zext i8 %0 to i32 ++ %1 = load i32, i32* %u32_a, align 4 ++ call void @llvm.loongarch.iocsrwr.b(i32 %conv, i32 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.iocsrwr.b(i32, i32) ++ ++; CHECK-LABEL: iocsrwr_b: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 ++; CHECK: ld.bu $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: iocsrwr.b $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @iocsrwr_h() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u16_a = alloca i16, align 2 ++ %0 = load i16, i16* %u16_a, align 2 ++ %conv = zext i16 %0 to i32 ++ %1 = load i32, i32* %u32_a, align 4 ++ call void @llvm.loongarch.iocsrwr.h(i32 %conv, i32 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.iocsrwr.h(i32, i32) ++ ++; CHECK-LABEL: iocsrwr_h: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 ++; CHECK: ld.hu $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: iocsrwr.h $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @iocsrwr_w() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u32_b = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ %1 = load i32, i32* %u32_b, align 4 ++ call void @llvm.loongarch.iocsrwr.w(i32 %0, i32 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.iocsrwr.w(i32, i32) ++ ++; CHECK-LABEL: iocsrwr_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 12 ++; CHECK: iocsrwr.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @iocsrwr_d() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %u64_a = alloca i64, align 8 ++ %0 = load i64, i64* %u64_a, align 8 ++ %1 = load i32, i32* %u32_a, align 4 ++ call void @llvm.loongarch.iocsrwr.d(i64 %0, i32 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.iocsrwr.d(i64, i32) ++ ++; CHECK-LABEL: iocsrwr_d: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 ++; CHECK: iocsrwr.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @cacop_w() { ++entry: ++ %i32_a = alloca i32, align 4 ++ %0 = load i32, i32* %i32_a, align 4 ++ call void @llvm.loongarch.cacop.w(i32 1, i32 %0, i32 2) ++ ret void ++} ++ ++declare void @llvm.loongarch.cacop.w(i32, i32, i32) ++ ++; CHECK-LABEL: cacop_w: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 ++; CHECK: jr $ra ++; ++ ++define void @cacop_d() { ++entry: ++ %i64_a = alloca i64, align 8 ++ %0 = load i64, i64* %i64_a, align 8 ++ call void @llvm.loongarch.cacop.d(i32 1, i64 %0, i64 2) ++ ret void ++} ++ ++declare void @llvm.loongarch.cacop.d(i32, i64, i64) ++ ++; CHECK-LABEL: cacop_d: ++; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 ++; CHECK: jr $ra ++; ++ ++define void @rdtime_d() { ++entry: ++ %value = alloca i64, align 8 ++ %timeid = alloca i64, align 8 ++ %0 = call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() nounwind ++ %asmresult0 = extractvalue { i64, i64 } %0, 0 ++ %asmresult1 = extractvalue { i64, i64 } %0, 1 ++ store i64 %asmresult0, i64* %value, align 8 ++ store i64 %asmresult1, i64* %timeid, align 8 ++ ret void ++} ++ ++; CHECK-LABEL: rdtime_d: ++; CHECK: rdtime.d $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] ++; CHECK: st.d $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 0 ++; CHECK: jr $ra ++; ++ ++define void @rdtimeh_w() { ++entry: ++ %value = alloca i32, align 4 ++ %timeid = alloca i32, align 4 ++ %0 = call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() nounwind ++ %asmresult0 = extractvalue { i32, i32 } %0, 0 ++ %asmresult1 = extractvalue { i32, i32 } %0, 1 ++ store i32 %asmresult0, i32* %value, align 4 ++ store i32 %asmresult1, i32* %timeid, align 4 ++ ret void ++} ++ ++; CHECK-LABEL: rdtimeh_w: ++; CHECK: rdtimeh.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] ++; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 ++; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @rdtimel_w() { ++entry: ++ %value = alloca i32, align 4 ++ %timeid = alloca i32, align 4 ++ %0 = call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() nounwind ++ %asmresult0 = extractvalue { i32, i32 } %0, 0 ++ %asmresult1 = extractvalue { i32, i32 } %0, 1 ++ store i32 %asmresult0, i32* %value, align 4 ++ store i32 %asmresult1, i32* %timeid, align 4 ++ ret void ++} ++ ++; CHECK-LABEL: rdtimel_w: ++; CHECK: rdtimel.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] ++; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 ++; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: jr $ra ++; ++ ++define void @crc_w_b_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i8_a = alloca i8, align 1 ++ %0 = load i8, i8* %i8_a, align 1 ++ %conv = sext i8 %0 to i32 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crc.w.b.w(i32 %conv, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crc.w.b.w(i32, i32) ++ ++; CHECK-LABEL: crc_w_b_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 ++; CHECK: crc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crc_w_h_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i16_a = alloca i16, align 2 ++ %0 = load i16, i16* %i16_a, align 2 ++ %conv = sext i16 %0 to i32 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crc.w.h.w(i32 %conv, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crc.w.h.w(i32, i32) ++ ++; CHECK-LABEL: crc_w_h_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 ++; CHECK: crc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crc_w_w_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i32_b = alloca i32, align 4 ++ %0 = load i32, i32* %i32_a, align 4 ++ %1 = load i32, i32* %i32_b, align 4 ++ %2 = call i32 @llvm.loongarch.crc.w.w.w(i32 %0, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crc.w.w.w(i32, i32) ++ ++; CHECK-LABEL: crc_w_w_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 ++; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: crc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crc_w_d_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i64_a = alloca i64, align 8 ++ %0 = load i64, i64* %i64_a, align 8 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crc.w.d.w(i64 %0, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crc.w.d.w(i64, i32) ++ ++; CHECK-LABEL: crc_w_d_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 ++; CHECK: crc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crcc_w_b_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i8_a = alloca i8, align 1 ++ %0 = load i8, i8* %i8_a, align 1 ++ %conv = sext i8 %0 to i32 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crcc.w.b.w(i32 %conv, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crcc.w.b.w(i32, i32) ++ ++; CHECK-LABEL: crcc_w_b_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 ++; CHECK: crcc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crcc_w_h_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i16_a = alloca i16, align 2 ++ %0 = load i16, i16* %i16_a, align 2 ++ %conv = sext i16 %0 to i32 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crcc.w.h.w(i32 %conv, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crcc.w.h.w(i32, i32) ++ ++; CHECK-LABEL: crcc_w_h_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 ++; CHECK: crcc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crcc_w_w_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i32_b = alloca i32, align 4 ++ %0 = load i32, i32* %i32_a, align 4 ++ %1 = load i32, i32* %i32_b, align 4 ++ %2 = call i32 @llvm.loongarch.crcc.w.w.w(i32 %0, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crcc.w.w.w(i32, i32) ++ ++; CHECK-LABEL: crcc_w_w_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 ++; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: crcc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @crcc_w_d_w() { ++entry: ++ %i32_r = alloca i32, align 4 ++ %i32_a = alloca i32, align 4 ++ %i64_a = alloca i64, align 8 ++ %0 = load i64, i64* %i64_a, align 8 ++ %1 = load i32, i32* %i32_a, align 4 ++ %2 = call i32 @llvm.loongarch.crcc.w.d.w(i64 %0, i32 %1) ++ store i32 %2, i32* %i32_r, align 4 ++ ret void ++} ++ ++declare i32 @llvm.loongarch.crcc.w.d.w(i64, i32) ++ ++; CHECK-LABEL: crcc_w_d_w: ++; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 ++; CHECK: crcc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @tlbclr() { ++entry: ++ call void @llvm.loongarch.tlbclr() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbclr() ++ ++; CHECK-LABEL: tlbclr: ++; CHECK: tlbclr ++; CHECK: jr $ra ++; ++ ++define void @tlbflush() { ++entry: ++ call void @llvm.loongarch.tlbflush() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbflush() ++ ++; CHECK-LABEL: tlbflush: ++; CHECK: tlbflush ++; CHECK: jr $ra ++; ++ ++define void @tlbfill() { ++entry: ++ call void @llvm.loongarch.tlbfill() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbfill() ++ ++; CHECK-LABEL: tlbfill: ++; CHECK: tlbfill ++; CHECK: jr $ra ++; ++ ++define void @tlbrd() { ++entry: ++ call void @llvm.loongarch.tlbrd() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbrd() ++ ++; CHECK-LABEL: tlbrd: ++; CHECK: tlbrd ++; CHECK: jr $ra ++; ++ ++define void @tlbwr() { ++entry: ++ call void @llvm.loongarch.tlbwr() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbwr() ++ ++; CHECK-LABEL: tlbwr: ++; CHECK: tlbwr ++; CHECK: jr $ra ++; ++ ++define void @tlbsrch() { ++entry: ++ call void @llvm.loongarch.tlbsrch() ++ ret void ++} ++ ++declare void @llvm.loongarch.tlbsrch() ++ ++; CHECK-LABEL: tlbsrch: ++; CHECK: tlbsrch ++; CHECK: jr $ra ++; ++ ++define void @syscall() { ++entry: ++ call void @llvm.loongarch.syscall(i64 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.syscall(i64) ++ ++; CHECK-LABEL: syscall: ++; CHECK: syscall 1 ++; CHECK: jr $ra ++; ++ ++define void @break_builtin() { ++entry: ++ call void @llvm.loongarch.break(i64 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.break(i64) ++ ++; CHECK-LABEL: break_builtin: ++; CHECK: break 1 ++; CHECK: jr $ra ++; ++ ++define void @asrtle_d() { ++entry: ++ %i64_a = alloca i64, align 8 ++ %i64_b = alloca i64, align 8 ++ %0 = load i64, i64* %i64_a, align 8 ++ %1 = load i64, i64* %i64_b, align 8 ++ call void @llvm.loongarch.asrtle.d(i64 %0, i64 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.asrtle.d(i64, i64) ++ ++; CHECK-LABEL: asrtle_d: ++; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: asrtle.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @asrtgt_d() { ++entry: ++ %i64_a = alloca i64, align 8 ++ %i64_b = alloca i64, align 8 ++ %0 = load i64, i64* %i64_a, align 8 ++ %1 = load i64, i64* %i64_b, align 8 ++ call void @llvm.loongarch.asrtgt.d(i64 %0, i64 %1) ++ ret void ++} ++ ++declare void @llvm.loongarch.asrtgt.d(i64, i64) ++ ++; CHECK-LABEL: asrtgt_d: ++; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 ++; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 ++; CHECK: asrtgt.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] ++; CHECK: jr $ra ++; ++ ++define void @dbar() { ++entry: ++ call void @llvm.loongarch.dbar(i64 0) ++ ret void ++} ++ ++declare void @llvm.loongarch.dbar(i64) ++ ++; CHECK-LABEL: dbar: ++; CHECK: dbar 0 ++; CHECK: jr $ra ++; ++ ++define void @ibar() { ++entry: ++ call void @llvm.loongarch.ibar(i64 0) ++ ret void ++} ++ ++declare void @llvm.loongarch.ibar(i64) ++ ++; CHECK-LABEL: ibar: ++; CHECK: ibar 0 ++; CHECK: jr $ra ++; ++ ++define void @movfcsr2gr() { ++entry: ++ %u32_r = alloca i32, align 4 ++ %rd = alloca i32, align 4 ++ %0 = call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() ++ store i32 %0, i32* %rd, align 4 ++ %1 = load i32, i32* %rd, align 4 ++ store i32 %1, i32* %u32_r, align 4 ++ ret void ++} ++ ++; CHECK-LABEL: movfcsr2gr: ++; CHECK: movfcsr2gr $r[[REG:[0-9]+]], $fcsr[[REG:[0-9]+]] ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 8 ++; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: jr $ra ++; ++ ++define void @movgr2fcsr() { ++entry: ++ %u32_a = alloca i32, align 4 ++ %0 = load i32, i32* %u32_a, align 4 ++ call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) ++ ret void ++} ++ ++; CHECK-LABEL: movgr2fcsr: ++; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 ++; CHECK: movgr2fcsr $fcsr[[REG:[0-9]+]], $r[[REG:[0-9]+]] ++; CHECK: jr $ra ++; +diff --git a/llvm/test/CodeGen/LoongArch/const-mult.ll b/llvm/test/CodeGen/LoongArch/const-mult.ll +new file mode 100644 +index 000000000..955e16268 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/const-mult.ll +@@ -0,0 +1,245 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mtriple=loongarch64-linux-gnu < %s | FileCheck %s ++ ++ ++; This test is copied from Mips except the mul2730_32 and mul2730_64 ++ ++define i32 @mul5_32(i32 signext %a) { ++; CHECK-LABEL: mul5_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: alsl.w $r4, $r4, $r4, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i32 %a, 5 ++ ret i32 %mul ++} ++ ++define i32 @mul27_32(i32 signext %a) { ++; CHECK-LABEL: mul27_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 ++; CHECK-NEXT: slli.w $r4, $r4, 5 ++; CHECK-NEXT: sub.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i32 %a, 27 ++ ret i32 %mul ++} ++ ++define i32 @muln2147483643_32(i32 signext %a) { ++; CHECK-LABEL: muln2147483643_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 ++; CHECK-NEXT: slli.w $r4, $r4, 31 ++; CHECK-NEXT: add.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i32 %a, -2147483643 ++ ret i32 %mul ++} ++ ++define i64 @muln9223372036854775805_64(i64 signext %a) { ++; CHECK-LABEL: muln9223372036854775805_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: alsl.d $r5, $r4, $r4, 1 ++; CHECK-NEXT: slli.d $r4, $r4, 63 ++; CHECK-NEXT: add.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i64 %a, -9223372036854775805 ++ ret i64 %mul ++} ++ ++define i128 @muln170141183460469231731687303715884105725_128(i128 signext %a) { ++; CHECK-LABEL: muln170141183460469231731687303715884105725_128: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: srli.d $r6, $r4, 63 ++; CHECK-NEXT: slli.d $r7, $r5, 1 ++; CHECK-NEXT: or $r6, $r7, $r6 ++; CHECK-NEXT: add.d $r5, $r6, $r5 ++; CHECK-NEXT: slli.d $r7, $r4, 1 ++; CHECK-NEXT: alsl.d $r6, $r4, $r4, 1 ++; CHECK-NEXT: sltu $r7, $r6, $r7 ++; CHECK-NEXT: bstrpick.d $r7, $r7, 31, 0 ++; CHECK-NEXT: add.d $r5, $r5, $r7 ++; CHECK-NEXT: slli.d $r4, $r4, 63 ++; CHECK-NEXT: add.d $r5, $r4, $r5 ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i128 %a, -170141183460469231731687303715884105725 ++ ret i128 %mul ++} ++ ++define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) { ++; CHECK-LABEL: mul170141183460469231731687303715884105723_128: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: srli.d $r6, $r4, 62 ++; CHECK-NEXT: slli.d $r7, $r5, 2 ++; CHECK-NEXT: or $r6, $r7, $r6 ++; CHECK-NEXT: add.d $r5, $r6, $r5 ++; CHECK-NEXT: slli.d $r6, $r4, 2 ++; CHECK-NEXT: alsl.d $r7, $r4, $r4, 2 ++; CHECK-NEXT: sltu $r6, $r7, $r6 ++; CHECK-NEXT: bstrpick.d $r6, $r6, 31, 0 ++; CHECK-NEXT: add.d $r5, $r5, $r6 ++; CHECK-NEXT: slli.d $r4, $r4, 63 ++; CHECK-NEXT: sub.d $r4, $r4, $r5 ++; CHECK-NEXT: sltu $r5, $zero, $r7 ++; CHECK-NEXT: bstrpick.d $r5, $r5, 31, 0 ++; CHECK-NEXT: sub.d $r5, $r4, $r5 ++; CHECK-NEXT: addi.d $r4, $zero, 0 ++; CHECK-NEXT: sub.d $r4, $r4, $r7 ++; CHECK-NEXT: jr $ra ++entry: ++ %mul = mul nsw i128 %a, 170141183460469231731687303715884105723 ++ ret i128 %mul ++} ++ ++define i32 @mul42949673_32(i32 %a) { ++; CHECK-LABEL: mul42949673_32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r5, 10485 ++; CHECK-NEXT: ori $r5, $r5, 3113 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %b = mul i32 %a, 42949673 ++ ret i32 %b ++} ++ ++define i64 @mul42949673_64(i64 %a) { ++; CHECK-LABEL: mul42949673_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 10485 ++; CHECK-NEXT: ori $r5, $r5, 3113 ++; CHECK-NEXT: mul.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 42949673 ++ ret i64 %b ++} ++ ++define i32 @mul22224078_32(i32 %a) { ++; CHECK-LABEL: mul22224078_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5425 ++; CHECK-NEXT: ori $r5, $r5, 3278 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i32 %a, 22224078 ++ ret i32 %b ++} ++ ++define i64 @mul22224078_64(i64 %a) { ++; CHECK-LABEL: mul22224078_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5425 ++; CHECK-NEXT: ori $r5, $r5, 3278 ++; CHECK-NEXT: mul.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 22224078 ++ ret i64 %b ++} ++ ++define i32 @mul22245375_32(i32 %a) { ++; CHECK-LABEL: mul22245375_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5430 ++; CHECK-NEXT: ori $r5, $r5, 4095 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i32 %a, 22245375 ++ ret i32 %b ++} ++ ++define i64 @mul22245375_64(i64 %a) { ++; CHECK-LABEL: mul22245375_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5430 ++; CHECK-NEXT: ori $r5, $r5, 4095 ++; CHECK-NEXT: mul.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 22245375 ++ ret i64 %b ++} ++ ++define i32 @mul25165824_32(i32 %a) { ++; CHECK-LABEL: mul25165824_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5430 ++; CHECK-NEXT: ori $r5, $r5, 4095 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i32 %a, 22245375 ++ ret i32 %b ++} ++ ++define i64 @mul25165824_64(i64 %a) { ++; CHECK-LABEL: mul25165824_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.d $r5, $r4, 23 ++; CHECK-NEXT: slli.d $r4, $r4, 24 ++; CHECK-NEXT: add.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 25165824 ++ ret i64 %b ++} ++ ++define i32 @mul33554432_32(i32 %a) { ++; CHECK-LABEL: mul33554432_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r5, 5430 ++; CHECK-NEXT: ori $r5, $r5, 4095 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i32 %a, 22245375 ++ ret i32 %b ++} ++ ++define i64 @mul33554432_64(i64 %a) { ++; CHECK-LABEL: mul33554432_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.d $r4, $r4, 25 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 33554432 ++ ret i64 %b ++} ++ ++define i32 @mul2730_32(i32 %a) { ++; CHECK-LABEL: mul2730_32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: ori $r5, $zero, 2730 ++; CHECK-NEXT: mul.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i32 %a, 2730 ++ ret i32 %b ++} ++ ++define i64 @mul2730_64(i64 %a) { ++; CHECK-LABEL: mul2730_64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $r5, $zero, 2730 ++; CHECK-NEXT: mul.d $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = mul i64 %a, 2730 ++ ret i64 %b ++} +diff --git a/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll b/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll +new file mode 100644 +index 000000000..586daca23 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll +@@ -0,0 +1,94 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s \ ++; RUN: | FileCheck %s --check-prefixes=CHECK1 ++; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls < %s \ ++; RUN: | FileCheck %s --check-prefixes=CHECK2 ++; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls=false < %s \ ++; RUN: | FileCheck %s --check-prefixes=CHECK3 ++ ++; Function with attribute #0 = { "disable-tail-calls"="true" } ++define i32 @caller1(i32 %a) #0 { ++; CHECK1-LABEL: caller1: ++; CHECK1: # %bb.0: # %entry ++; CHECK1-NEXT: addi.d $sp, $sp, -16 ++; CHECK1-NEXT: .cfi_def_cfa_offset 16 ++; CHECK1-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK1-NEXT: .cfi_offset 1, -8 ++; CHECK1-NEXT: bl callee ++; CHECK1-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK1-NEXT: addi.d $sp, $sp, 16 ++; CHECK1-NEXT: jr $ra ++; ++; CHECK2-LABEL: caller1: ++; CHECK2: # %bb.0: # %entry ++; CHECK2-NEXT: addi.d $sp, $sp, -16 ++; CHECK2-NEXT: .cfi_def_cfa_offset 16 ++; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK2-NEXT: .cfi_offset 1, -8 ++; CHECK2-NEXT: bl callee ++; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK2-NEXT: addi.d $sp, $sp, 16 ++; CHECK2-NEXT: jr $ra ++; ++; CHECK3-LABEL: caller1: ++; CHECK3: # %bb.0: # %entry ++; CHECK3-NEXT: b callee ++entry: ++ %call = tail call i32 @callee(i32 %a) ++ ret i32 %call ++} ++ ++ ++; Function with attribute #1 = { "disable-tail-calls"="false" } ++define i32 @caller2(i32 %a) #1 { ++; CHECK1-LABEL: caller2: ++; CHECK1: # %bb.0: # %entry ++; CHECK1-NEXT: b callee ++; ++; CHECK2-LABEL: caller2: ++; CHECK2: # %bb.0: # %entry ++; CHECK2-NEXT: addi.d $sp, $sp, -16 ++; CHECK2-NEXT: .cfi_def_cfa_offset 16 ++; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK2-NEXT: .cfi_offset 1, -8 ++; CHECK2-NEXT: bl callee ++; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK2-NEXT: addi.d $sp, $sp, 16 ++; CHECK2-NEXT: jr $ra ++; ++; CHECK3-LABEL: caller2: ++; CHECK3: # %bb.0: # %entry ++; CHECK3-NEXT: b callee ++entry: ++ %call = tail call i32 @callee(i32 %a) ++ ret i32 %call ++} ++ ++define i32 @caller3(i32 %a) { ++; CHECK1-LABEL: caller3: ++; CHECK1: # %bb.0: # %entry ++; CHECK1-NEXT: b callee ++; ++; CHECK2-LABEL: caller3: ++; CHECK2: # %bb.0: # %entry ++; CHECK2-NEXT: addi.d $sp, $sp, -16 ++; CHECK2-NEXT: .cfi_def_cfa_offset 16 ++; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK2-NEXT: .cfi_offset 1, -8 ++; CHECK2-NEXT: bl callee ++; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK2-NEXT: addi.d $sp, $sp, 16 ++; CHECK2-NEXT: jr $ra ++; ++; CHECK3-LABEL: caller3: ++; CHECK3: # %bb.0: # %entry ++; CHECK3-NEXT: b callee ++entry: ++ %call = tail call i32 @callee(i32 %a) ++ ret i32 %call ++} ++ ++declare i32 @callee(i32) ++ ++attributes #0 = { "disable-tail-calls"="true" } ++attributes #1 = { "disable-tail-calls"="false" } +diff --git a/llvm/test/CodeGen/LoongArch/divrem.ll b/llvm/test/CodeGen/LoongArch/divrem.ll +new file mode 100644 +index 000000000..34293a83c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/divrem.ll +@@ -0,0 +1,68 @@ ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,CHECK-TRAP ++ ++; RUN: llc -march=loongarch64 -mnocheck-zero-division -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,NOCHECK ++ ++; FileCheck Prefixes: ++; CHECK-TRAP - trap ++; NOCHECK - Division by zero will not be detected ++ ++define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { ++entry: ++; CHECK-LABEL: sdiv1: ++ ++; CHECK: div.w $r4, $r4, $r5 ++; CHECK-TRAP: bne $r5, $zero, 8 ++; CHECK-TRAP: break 7 ++ ++; NOCHECK-NOT: bne ++; NOCHECK-NOT: break ++ ++ %div = sdiv i32 %a0, %a1 ++ ret i32 %div ++} ++ ++define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone { ++entry: ++; CHECK-LABEL: srem1: ++ ++; CHECK: mod.w $r4, $r4, $r5 ++; CHECK-TRAP: bne $r5, $zero, 8 ++; CHECK-TRAP: break 7 ++ ++; NOCHECK-NOT: bne ++; NOCHECK-NOT: break ++ ++ %rem = srem i32 %a0, %a1 ++ ret i32 %rem ++} ++ ++define i32 @udiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { ++entry: ++; CHECK-LABEL: udiv1: ++ ++; CHECK: div.wu $r4, $r4, $r5 ++; CHECK-TRAP: bne $r5, $zero, 8 ++; CHECK-TRAP: break 7 ++ ++; NOCHECK-NOT: bne ++; NOCHECK-NOT: break ++ ++ %div = udiv i32 %a0, %a1 ++ ret i32 %div ++} ++ ++define i32 @urem1(i32 signext %a0, i32 signext %a1) nounwind readnone { ++entry: ++; CHECK-LABEL: urem1: ++ ++ ++; CHECK: mod.wu $r4, $r4, $r5 ++; CHECK-TRAP: bne $r5, $zero, 8 ++; CHECK-TRAP: break 7 ++ ++; NOCHECK-NOT: bne ++; NOCHECK-NOT: break ++ ++ %rem = urem i32 %a0, %a1 ++ ret i32 %rem ++} +diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll +deleted file mode 100644 +index a7782cf85..000000000 +--- a/llvm/test/CodeGen/LoongArch/double-imm.ll ++++ /dev/null +@@ -1,89 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-define double @f64_positive_zero() nounwind { +-; LA32-LABEL: f64_positive_zero: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $zero +-; LA32-NEXT: movgr2frh.w $fa0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_positive_zero: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.d $fa0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret double 0.0 +-} +- +-define double @f64_negative_zero() nounwind { +-; LA32-LABEL: f64_negative_zero: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $zero +-; LA32-NEXT: movgr2frh.w $fa0, $zero +-; LA32-NEXT: fneg.d $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_negative_zero: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.d $fa0, $zero +-; LA64-NEXT: fneg.d $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret double -0.0 +-} +- +-define double @f64_constant_pi() nounwind { +-; LA32-LABEL: f64_constant_pi: +-; LA32: # %bb.0: +-; LA32-NEXT: pcalau12i $a0, .LCPI2_0 +-; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 +-; LA32-NEXT: fld.d $fa0, $a0, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_constant_pi: +-; LA64: # %bb.0: +-; LA64-NEXT: pcalau12i $a0, .LCPI2_0 +-; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 +-; LA64-NEXT: fld.d $fa0, $a0, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret double 3.1415926535897931159979634685441851615905761718750 +-} +- +-define double @f64_add_fimm1(double %a) nounwind { +-; LA32-LABEL: f64_add_fimm1: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $zero, 1 +-; LA32-NEXT: movgr2fr.w $fa1, $a0 +-; LA32-NEXT: ffint.s.w $fa1, $fa1 +-; LA32-NEXT: fcvt.d.s $fa1, $fa1 +-; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_add_fimm1: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $zero, 1 +-; LA64-NEXT: movgr2fr.d $fa1, $a0 +-; LA64-NEXT: ffint.d.l $fa1, $fa1 +-; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fadd double %a, 1.0 +- ret double %1 +-} +- +-define double @f64_positive_fimm1() nounwind { +-; LA32-LABEL: f64_positive_fimm1: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $zero, 1 +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.s.w $fa0, $fa0 +-; LA32-NEXT: fcvt.d.s $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_positive_fimm1: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $zero, 1 +-; LA64-NEXT: movgr2fr.d $fa0, $a0 +-; LA64-NEXT: ffint.d.l $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret double 1.0 +-} +diff --git a/llvm/test/CodeGen/LoongArch/dup-tail.ll b/llvm/test/CodeGen/LoongArch/dup-tail.ll +new file mode 100644 +index 000000000..cad67e98c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/dup-tail.ll +@@ -0,0 +1,45 @@ ++; RUN: llc --mtriple=loongarch64 --relocation-model=pic -o - %s | FileCheck %s ++ ++;; Duplicate returns to enable tail call optimizations. ++declare i32 @test() ++declare i32 @test1() ++declare i32 @test2() ++declare i32 @test3() ++define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: duplicate_returns: ++; CHECK: b test2 ++; CHECK: b test ++; CHECK: b test1 ++; CHECK: b test3 ++entry: ++ %cmp = icmp eq i32 %a, 0 ++ br i1 %cmp, label %if.then, label %if.else ++ ++if.then: ; preds = %entry ++ %call = tail call i32 @test() ++ br label %return ++ ++if.else: ; preds = %entry ++ %cmp1 = icmp eq i32 %b, 0 ++ br i1 %cmp1, label %if.then2, label %if.else4 ++ ++if.then2: ; preds = %if.else ++ %call3 = tail call i32 @test1() ++ br label %return ++ ++if.else4: ; preds = %if.else ++ %cmp5 = icmp sgt i32 %a, %b ++ br i1 %cmp5, label %if.then6, label %if.else8 ++ ++if.then6: ; preds = %if.else4 ++ %call7 = tail call i32 @test2() ++ br label %return ++ ++if.else8: ; preds = %if.else4 ++ %call9 = tail call i32 @test3() ++ br label %return ++ ++return: ; preds = %if.else8, %if.then6, %if.then2, %if.then ++ %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ] ++ ret i32 %retval ++} +diff --git a/llvm/test/CodeGen/LoongArch/eliminateFI.ll b/llvm/test/CodeGen/LoongArch/eliminateFI.ll +new file mode 100644 +index 000000000..0272c95bd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/eliminateFI.ll +@@ -0,0 +1,106 @@ ++; Check whether LoongArchSERegisterInfo::eliminateFI works well ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define signext i32 @ldptr_w_unaligned() { ++; CHECK-LABEL: ldptr_w_unaligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 ++ %0 = bitcast i8* %arrayidx to i32* ++; the offset MUST be 0 ++; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 ++ %1 = load i32, i32* %0, align 1 ++ ret i32 %1 ++} ++ ++define signext i32 @ldptr_w_aligned() { ++; CHECK-LABEL: ldptr_w_aligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 ++ %0 = bitcast i8* %arrayidx to i32* ++; the offset may not be 0, but MUST be 4-bytes aligned ++; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} ++ %1 = load i32, i32* %0, align 1 ++ ret i32 %1 ++} ++ ++define signext i64 @ldptr_d_unaligned() { ++; CHECK-LABEL: ldptr_d_unaligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 ++ %0 = bitcast i8* %arrayidx to i64* ++; the offset MUST be 0 ++; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 ++ %1 = load i64, i64* %0, align 1 ++ ret i64 %1 ++} ++ ++define signext i64 @ldptr_d_aligned() { ++; CHECK-LABEL: ldptr_d_aligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 ++ %0 = bitcast i8* %arrayidx to i64* ++; the offset may not be 0, but MUST be 4-bytes aligned ++; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} ++ %1 = load i64, i64* %0, align 1 ++ ret i64 %1 ++} ++ ++define void @stptr_w_unaligned(i32 signext %val) { ++; CHECK-LABEL: stptr_w_unaligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 ++ %0 = bitcast i8* %arrayidx to i32* ++; the offset MUST be 0 ++; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 ++ store i32 %val, i32* %0, align 1 ++ ret void ++} ++ ++define void @stptr_w_aligned(i32 signext %val) { ++; CHECK-LABEL: stptr_w_aligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 ++ %0 = bitcast i8* %arrayidx to i32* ++; the offset may not be 0, but MUST be 4-bytes aligned ++; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} ++ store i32 %val, i32* %0, align 1 ++ ret void ++} ++ ++define void @stptr_d_unaligned(i64 %val) { ++; CHECK-LABEL: stptr_d_unaligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 ++ %0 = bitcast i8* %arrayidx to i64* ++; the offset MUST be 0 ++; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 ++ store i64 %val, i64* %0, align 1 ++ ret void ++} ++ ++define void @stptr_d_aligned(i64 %val) { ++; CHECK-LABEL: stptr_d_aligned: ++; CHECK: # %bb.0: # %entry ++entry: ++ %array = alloca [6000 x i8], align 1 ++ %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 ++ %0 = bitcast i8* %arrayidx to i64* ++; the offset may not be 0, but MUST be 4-bytes aligned ++; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} ++ store i64 %val, i64* %0, align 1 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll +new file mode 100644 +index 000000000..80fa7a855 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll +@@ -0,0 +1,103 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s ++ ++@var = external global i32 ++ ++define void @func() { ++; CHECK-LABEL: func: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -2048 ++; CHECK-NEXT: addi.d $sp, $sp, -2048 ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 4112 ++; CHECK-NEXT: lu12i.w $r5, var ++; CHECK-NEXT: ori $r5, $r5, var ++; CHECK-NEXT: lu32i.d $r5, var ++; CHECK-NEXT: lu52i.d $r5, $r5, var ++; CHECK-NEXT: ld.w $r20, $r5, 0 ++; CHECK-NEXT: ld.w $r19, $r5, 0 ++; CHECK-NEXT: ld.w $r18, $r5, 0 ++; CHECK-NEXT: ld.w $r17, $r5, 0 ++; CHECK-NEXT: ld.w $r16, $r5, 0 ++; CHECK-NEXT: ld.w $r15, $r5, 0 ++; CHECK-NEXT: ld.w $r14, $r5, 0 ++; CHECK-NEXT: ld.w $r13, $r5, 0 ++; CHECK-NEXT: ld.w $r12, $r5, 0 ++; CHECK-NEXT: ld.w $r11, $r5, 0 ++; CHECK-NEXT: ld.w $r10, $r5, 0 ++; CHECK-NEXT: ld.w $r9, $r5, 0 ++; CHECK-NEXT: ld.w $r8, $r5, 0 ++; CHECK-NEXT: ld.w $r7, $r5, 0 ++; CHECK-NEXT: ld.w $r6, $r5, 0 ++; CHECK-NEXT: ld.w $r4, $r5, 0 ++; CHECK-NEXT: st.d $r23, $sp, 0 ++; CHECK-NEXT: lu12i.w $r23, 1 ++; CHECK-NEXT: ori $r23, $r23, 12 ++; CHECK-NEXT: add.d $r23, $sp, $r23 ++; CHECK-NEXT: st.w $r20, $r23, 0 ++; CHECK-NEXT: ld.d $r23, $sp, 0 ++; CHECK-NEXT: st.w $r20, $r5, 0 ++; CHECK-NEXT: st.w $r19, $r5, 0 ++; CHECK-NEXT: st.w $r18, $r5, 0 ++; CHECK-NEXT: st.w $r17, $r5, 0 ++; CHECK-NEXT: st.w $r16, $r5, 0 ++; CHECK-NEXT: st.w $r15, $r5, 0 ++; CHECK-NEXT: st.w $r14, $r5, 0 ++; CHECK-NEXT: st.w $r13, $r5, 0 ++; CHECK-NEXT: st.w $r12, $r5, 0 ++; CHECK-NEXT: st.w $r11, $r5, 0 ++; CHECK-NEXT: st.w $r10, $r5, 0 ++; CHECK-NEXT: st.w $r9, $r5, 0 ++; CHECK-NEXT: st.w $r8, $r5, 0 ++; CHECK-NEXT: st.w $r7, $r5, 0 ++; CHECK-NEXT: st.w $r6, $r5, 0 ++; CHECK-NEXT: st.w $r4, $r5, 0 ++; CHECK-NEXT: lu12i.w $r4, 1 ++; CHECK-NEXT: ori $r4, $r4, 16 ++; CHECK-NEXT: add.d $sp, $sp, $r4 ++; CHECK-NEXT: jr $ra ++ %space = alloca i32, align 4 ++ %stackspace = alloca[1024 x i32], align 4 ++ ++ ;; Load values to increase register pressure. ++ %v0 = load volatile i32, i32* @var ++ %v1 = load volatile i32, i32* @var ++ %v2 = load volatile i32, i32* @var ++ %v3 = load volatile i32, i32* @var ++ %v4 = load volatile i32, i32* @var ++ %v5 = load volatile i32, i32* @var ++ %v6 = load volatile i32, i32* @var ++ %v7 = load volatile i32, i32* @var ++ %v8 = load volatile i32, i32* @var ++ %v9 = load volatile i32, i32* @var ++ %v10 = load volatile i32, i32* @var ++ %v11 = load volatile i32, i32* @var ++ %v12 = load volatile i32, i32* @var ++ %v13 = load volatile i32, i32* @var ++ %v14 = load volatile i32, i32* @var ++ %v15 = load volatile i32, i32* @var ++ ++ ;; Computing a stack-relative values needs an additional register. ++ ;; We should get an emergency spill/reload for this. ++ store volatile i32 %v0, i32* %space ++ ++ ;; store values so they are used. ++ store volatile i32 %v0, i32* @var ++ store volatile i32 %v1, i32* @var ++ store volatile i32 %v2, i32* @var ++ store volatile i32 %v3, i32* @var ++ store volatile i32 %v4, i32* @var ++ store volatile i32 %v5, i32* @var ++ store volatile i32 %v6, i32* @var ++ store volatile i32 %v7, i32* @var ++ store volatile i32 %v8, i32* @var ++ store volatile i32 %v9, i32* @var ++ store volatile i32 %v10, i32* @var ++ store volatile i32 %v11, i32* @var ++ store volatile i32 %v12, i32* @var ++ store volatile i32 %v13, i32* @var ++ store volatile i32 %v14, i32* @var ++ store volatile i32 %v15, i32* @var ++ ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/fcopysign.ll b/llvm/test/CodeGen/LoongArch/fcopysign.ll +new file mode 100644 +index 000000000..c16413715 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fcopysign.ll +@@ -0,0 +1,17 @@ ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define float @fcopysign_s(float %a, float %b) { ++; CHECK-LABEL: fcopysign_s: ++; CHECK: fcopysign.s $f0, $f0, $f1 ++ %ret = call float @llvm.copysign.f32(float %a, float %b) ++ ret float %ret ++} ++declare float @llvm.copysign.f32(float %a, float %b) ++ ++define double @fcopysign_d(double %a, double %b) { ++; CHECK-LABEL: fcopysign_d: ++; CHECK: fcopysign.d $f0, $f0, $f1 ++ %ret = call double @llvm.copysign.f64(double %a, double %b) ++ ret double %ret ++} ++declare double @llvm.copysign.f64(double %a, double %b) +diff --git a/llvm/test/CodeGen/LoongArch/float-imm.ll b/llvm/test/CodeGen/LoongArch/float-imm.ll +deleted file mode 100644 +index a6b542c29..000000000 +--- a/llvm/test/CodeGen/LoongArch/float-imm.ll ++++ /dev/null +@@ -1,85 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +- +-define float @f32_positive_zero() nounwind { +-; LA32-LABEL: f32_positive_zero: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_positive_zero: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret float 0.0 +-} +- +-define float @f32_negative_zero() nounwind { +-; LA32-LABEL: f32_negative_zero: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $zero +-; LA32-NEXT: fneg.s $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_negative_zero: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $zero +-; LA64-NEXT: fneg.s $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret float -0.0 +-} +- +-define float @f32_constant_pi() nounwind { +-; LA32-LABEL: f32_constant_pi: +-; LA32: # %bb.0: +-; LA32-NEXT: pcalau12i $a0, .LCPI2_0 +-; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 +-; LA32-NEXT: fld.s $fa0, $a0, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_constant_pi: +-; LA64: # %bb.0: +-; LA64-NEXT: pcalau12i $a0, .LCPI2_0 +-; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 +-; LA64-NEXT: fld.s $fa0, $a0, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret float 3.14159274101257324218750 +-} +- +-define float @f32_add_fimm1(float %a) nounwind { +-; LA32-LABEL: f32_add_fimm1: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $zero, 1 +-; LA32-NEXT: movgr2fr.w $fa1, $a0 +-; LA32-NEXT: ffint.s.w $fa1, $fa1 +-; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_add_fimm1: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.w $a0, $zero, 1 +-; LA64-NEXT: movgr2fr.w $fa1, $a0 +-; LA64-NEXT: ffint.s.w $fa1, $fa1 +-; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fadd float %a, 1.0 +- ret float %1 +-} +- +-define float @f32_positive_fimm1() nounwind { +-; LA32-LABEL: f32_positive_fimm1: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $zero, 1 +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.s.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_positive_fimm1: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.w $a0, $zero, 1 +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.s.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- ret float 1.0 +-} +diff --git a/llvm/test/CodeGen/LoongArch/frame-info.ll b/llvm/test/CodeGen/LoongArch/frame-info.ll +new file mode 100644 +index 000000000..eb4fc69fa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/frame-info.ll +@@ -0,0 +1,132 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -relocation-model=pic -mtriple=loongarch64 -frame-pointer=all < %s | FileCheck %s ++ ++define void @trivial() { ++; CHECK-LABEL: trivial: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 22, -8 ++; CHECK-NEXT: addi.d $r22, $sp, 16 ++; CHECK-NEXT: .cfi_def_cfa 22, 0 ++; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++ ret void ++} ++ ++define void @stack_alloc(i32 signext %size) { ++; CHECK-LABEL: stack_alloc: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -32 ++; CHECK-NEXT: .cfi_def_cfa_offset 32 ++; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r22, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: .cfi_offset 22, -16 ++; CHECK-NEXT: addi.d $r22, $sp, 32 ++; CHECK-NEXT: .cfi_def_cfa 22, 0 ++; CHECK-NEXT: addi.w $r5, $zero, -16 ++; CHECK-NEXT: lu32i.d $r5, 1 ++; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 0 ++; CHECK-NEXT: addi.d $r4, $r4, 15 ++; CHECK-NEXT: and $r4, $r4, $r5 ++; CHECK-NEXT: sub.d $r4, $sp, $r4 ++; CHECK-NEXT: move $sp, $r4 ++; CHECK-NEXT: bl callee_with_args ++; CHECK-NEXT: addi.d $sp, $r22, -32 ++; CHECK-NEXT: ld.d $r22, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = alloca i8, i32 %size, align 16 ++ call void @callee_with_args(i8* nonnull %0) ++ ret void ++} ++ ++define void @branch_and_tail_call(i1 %a) { ++; CHECK-LABEL: branch_and_tail_call: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: andi $r4, $r4, 1 ++; CHECK-NEXT: beqz $r4, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %blue_pill ++; CHECK-NEXT: b callee1 ++; CHECK-NEXT: .LBB2_2: # %red_pill ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r22, $sp, 0 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: .cfi_offset 22, -16 ++; CHECK-NEXT: addi.d $r22, $sp, 16 ++; CHECK-NEXT: .cfi_def_cfa 22, 0 ++; CHECK-NEXT: bl callee2 ++; CHECK-NEXT: ld.d $r22, $sp, 0 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++ br i1 %a, label %blue_pill, label %red_pill ++blue_pill: ++ tail call void @callee1() ++ ret void ++red_pill: ++ call void @callee2() ++ ret void ++} ++ ++define void @big_frame() { ++; CHECK-LABEL: big_frame: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -2032 ++; CHECK-NEXT: .cfi_def_cfa_offset 2032 ++; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r22, $sp, 2016 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: .cfi_offset 22, -16 ++; CHECK-NEXT: addi.d $r22, $sp, 2032 ++; CHECK-NEXT: .cfi_def_cfa 22, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -48 ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2016 ++; CHECK-NEXT: add.d $r4, $r22, $r4 ++; CHECK-NEXT: addi.d $r4, $r4, 0 ++; CHECK-NEXT: bl callee_with_args ++; CHECK-NEXT: addi.d $sp, $sp, 48 ++; CHECK-NEXT: ld.d $r22, $sp, 2016 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 2032 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = alloca i8, i32 2048, align 16 ++ call void @callee_with_args(i8* nonnull %0) ++ ret void ++} ++ ++define void @varargs_frame(i32 %i, ...) { ++; CHECK-LABEL: varargs_frame: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: .cfi_def_cfa_offset 80 ++; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 22, -72 ++; CHECK-NEXT: addi.d $r22, $sp, 16 ++; CHECK-NEXT: .cfi_def_cfa 22, 64 ++; CHECK-NEXT: st.d $r11, $r22, 56 ++; CHECK-NEXT: st.d $r10, $r22, 48 ++; CHECK-NEXT: st.d $r9, $r22, 40 ++; CHECK-NEXT: st.d $r8, $r22, 32 ++; CHECK-NEXT: st.d $r7, $r22, 24 ++; CHECK-NEXT: st.d $r6, $r22, 16 ++; CHECK-NEXT: st.d $r5, $r22, 8 ++; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ ret void ++} ++ ++declare void @callee1() ++declare void @callee2() ++declare void @callee_with_args(i8*) +diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll +deleted file mode 100644 +index e0aa7db13..000000000 +--- a/llvm/test/CodeGen/LoongArch/frame.ll ++++ /dev/null +@@ -1,29 +0,0 @@ +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +- +-%struct.key_t = type { i32, [16 x i8] } +- +-define i32 @test() nounwind { +-; CHECK-LABEL: test: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -32 +-; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +-; CHECK-NEXT: st.w $zero, $sp, 16 +-; CHECK-NEXT: st.d $zero, $sp, 8 +-; CHECK-NEXT: st.d $zero, $sp, 0 +-; CHECK-NEXT: addi.d $a0, $sp, 0 +-; CHECK-NEXT: ori $a0, $a0, 4 +-; CHECK-NEXT: bl test1 +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 32 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %key = alloca %struct.key_t, align 4 +- call void @llvm.memset.p0i8.i64(ptr %key, i8 0, i64 20, i1 false) +- %1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0 +- call void @test1(ptr %1) +- ret i32 0 +-} +- +-declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1) +- +-declare void @test1(ptr) +diff --git a/llvm/test/CodeGen/LoongArch/fsel.ll b/llvm/test/CodeGen/LoongArch/fsel.ll +new file mode 100644 +index 000000000..f41ee08c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fsel.ll +@@ -0,0 +1,47 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++ ++define double @olt_f64(double %a, double %b) { ++; CHECK-LABEL: olt_f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fcmp.clt.d $fcc0, $f0, $f1 ++; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 ++; CHECK-NEXT: jr $ra ++ %cond = fcmp olt double %a, %b ++ %ret = select i1 %cond, double %a, double %b ++ ret double %ret ++} ++ ++define double @ogt_f64(double %a, double %b) { ++; CHECK-LABEL: ogt_f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fcmp.cule.d $fcc0, $f0, $f1 ++; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 ++; CHECK-NEXT: jr $ra ++ %cond = fcmp ogt double %a, %b ++ %ret = select i1 %cond, double %a, double %b ++ ret double %ret ++} ++ ++define float @olt_f32(float %a, float %b) { ++; CHECK-LABEL: olt_f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fcmp.clt.s $fcc0, $f0, $f1 ++; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 ++; CHECK-NEXT: jr $ra ++ %cond = fcmp olt float %a, %b ++ %ret = select i1 %cond, float %a, float %b ++ ret float %ret ++} ++ ++define float @ogt_f32(float %a, float %b) { ++; CHECK-LABEL: ogt_f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fcmp.cule.s $fcc0, $f0, $f1 ++; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 ++; CHECK-NEXT: jr $ra ++ %cond = fcmp ogt float %a, %b ++ %ret = select i1 %cond, float %a, float %b ++ ret float %ret ++} +diff --git a/llvm/test/CodeGen/LoongArch/imm.ll b/llvm/test/CodeGen/LoongArch/imm.ll +deleted file mode 100644 +index fb0dcf21f..000000000 +--- a/llvm/test/CodeGen/LoongArch/imm.ll ++++ /dev/null +@@ -1,165 +0,0 @@ +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +- +-define i64 @imm0() { +-; CHECK-LABEL: imm0: +-; CHECK: # %bb.0: +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 0 +-} +- +-define i64 @imm7ff0000000000000() { +-; CHECK-LABEL: imm7ff0000000000000: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu52i.d $a0, $zero, 2047 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 9218868437227405312 +-} +- +-define i64 @imm0000000000000fff() { +-; CHECK-LABEL: imm0000000000000fff: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a0, $zero, 4095 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 4095 +-} +- +-define i64 @imm0007ffff00000800() { +-; CHECK-LABEL: imm0007ffff00000800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a0, $zero, 2048 +-; CHECK-NEXT: lu32i.d $a0, 524287 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2251795518720000 +-} +- +-define i64 @immfff0000000000fff() { +-; CHECK-LABEL: immfff0000000000fff: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a0, $zero, 4095 +-; CHECK-NEXT: lu52i.d $a0, $a0, -1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 -4503599627366401 +-} +- +-define i64 @imm0008000000000fff() { +-; CHECK-LABEL: imm0008000000000fff: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a0, $zero, 4095 +-; CHECK-NEXT: lu32i.d $a0, -524288 +-; CHECK-NEXT: lu52i.d $a0, $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2251799813689343 +-} +- +-define i64 @immfffffffffffff800() { +-; CHECK-LABEL: immfffffffffffff800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a0, $zero, -2048 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 -2048 +-} +- +-define i64 @imm00000000fffff800() { +-; CHECK-LABEL: imm00000000fffff800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a0, $zero, -2048 +-; CHECK-NEXT: lu32i.d $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 4294965248 +-} +- +-define i64 @imm000ffffffffff800() { +-; CHECK-LABEL: imm000ffffffffff800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a0, $zero, -2048 +-; CHECK-NEXT: lu52i.d $a0, $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 4503599627368448 +-} +- +-define i64 @imm00080000fffff800() { +-; CHECK-LABEL: imm00080000fffff800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a0, $zero, -2048 +-; CHECK-NEXT: lu32i.d $a0, -524288 +-; CHECK-NEXT: lu52i.d $a0, $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2251804108650496 +-} +- +-define i64 @imm000000007ffff000() { +-; CHECK-LABEL: imm000000007ffff000: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, 524287 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2147479552 +-} +- +-define i64 @imm0000000080000000() { +-; CHECK-LABEL: imm0000000080000000: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -524288 +-; CHECK-NEXT: lu32i.d $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2147483648 +-} +- +-define i64 @imm000ffffffffff000() { +-; CHECK-LABEL: imm000ffffffffff000: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -1 +-; CHECK-NEXT: lu52i.d $a0, $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 4503599627366400 +-} +- +-define i64 @imm7ff0000080000000() { +-; CHECK-LABEL: imm7ff0000080000000: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -524288 +-; CHECK-NEXT: lu32i.d $a0, 0 +-; CHECK-NEXT: lu52i.d $a0, $a0, 2047 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 9218868439374888960 +-} +- +-define i64 @immffffffff80000800() { +-; CHECK-LABEL: immffffffff80000800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -524288 +-; CHECK-NEXT: ori $a0, $a0, 2048 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 -2147481600 +-} +- +-define i64 @immffffffff7ffff800() { +-; CHECK-LABEL: immffffffff7ffff800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, 524287 +-; CHECK-NEXT: ori $a0, $a0, 2048 +-; CHECK-NEXT: lu32i.d $a0, -1 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 -2147485696 +-} +- +-define i64 @imm7fffffff800007ff() { +-; CHECK-LABEL: imm7fffffff800007ff: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -524288 +-; CHECK-NEXT: ori $a0, $a0, 2047 +-; CHECK-NEXT: lu52i.d $a0, $a0, 2047 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 9223372034707294207 +-} +- +-define i64 @imm0008000080000800() { +-; CHECK-LABEL: imm0008000080000800: +-; CHECK: # %bb.0: +-; CHECK-NEXT: lu12i.w $a0, -524288 +-; CHECK-NEXT: ori $a0, $a0, 2048 +-; CHECK-NEXT: lu32i.d $a0, -524288 +-; CHECK-NEXT: lu52i.d $a0, $a0, 0 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- ret i64 2251801961170944 +-} +diff --git a/llvm/test/CodeGen/LoongArch/immediate.ll b/llvm/test/CodeGen/LoongArch/immediate.ll +new file mode 100644 +index 000000000..1de3ef0fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/immediate.ll +@@ -0,0 +1,2542 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mtriple=loongarch64 < %s | FileCheck %s ++define i64 @li0000000000000000() { ++; CHECK-LABEL: li0000000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r4, $zero, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 0 ++} ++ ++define i64 @li00000000000007ff() { ++; CHECK-LABEL: li00000000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r4, $zero, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 2047 ++} ++ ++define i64 @li0000000000000800() { ++; CHECK-LABEL: li0000000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: jr $ra ++ ret i64 2048 ++} ++ ++define i64 @li0000000000000fff() { ++; CHECK-LABEL: li0000000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: jr $ra ++ ret i64 4095 ++} ++ ++define i64 @li000000007ffff000() { ++; CHECK-LABEL: li000000007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2147479552 ++} ++ ++define i64 @li000000007ffff7ff() { ++; CHECK-LABEL: li000000007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 2147481599 ++} ++ ++define i64 @li000000007ffff800() { ++; CHECK-LABEL: li000000007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: jr $ra ++ ret i64 2147481600 ++} ++ ++define i64 @li000000007fffffff() { ++; CHECK-LABEL: li000000007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: jr $ra ++ ret i64 2147483647 ++} ++ ++define i64 @li0000000080000000() { ++; CHECK-LABEL: li0000000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2147483648 ++} ++ ++define i64 @li00000000800007ff() { ++; CHECK-LABEL: li00000000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2147485695 ++} ++ ++define i64 @li0000000080000800() { ++; CHECK-LABEL: li0000000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2147485696 ++} ++ ++define i64 @li0000000080000fff() { ++; CHECK-LABEL: li0000000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2147487743 ++} ++ ++define i64 @li00000000fffff000() { ++; CHECK-LABEL: li00000000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4294963200 ++} ++ ++define i64 @li00000000fffff7ff() { ++; CHECK-LABEL: li00000000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4294965247 ++} ++ ++define i64 @li00000000fffff800() { ++; CHECK-LABEL: li00000000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4294965248 ++} ++ ++define i64 @li00000000ffffffff() { ++; CHECK-LABEL: li00000000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4294967295 ++} ++ ++define i64 @li0007ffff00000000() { ++; CHECK-LABEL: li0007ffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251795518717952 ++} ++ ++define i64 @li0007ffff000007ff() { ++; CHECK-LABEL: li0007ffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251795518719999 ++} ++ ++define i64 @li0007ffff00000800() { ++; CHECK-LABEL: li0007ffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251795518720000 ++} ++ ++define i64 @li0007ffff00000fff() { ++; CHECK-LABEL: li0007ffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251795518722047 ++} ++ ++define i64 @li0007ffff7ffff000() { ++; CHECK-LABEL: li0007ffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666197504 ++} ++ ++define i64 @li0007ffff7ffff7ff() { ++; CHECK-LABEL: li0007ffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666199551 ++} ++ ++define i64 @li0007ffff7ffff800() { ++; CHECK-LABEL: li0007ffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666199552 ++} ++ ++define i64 @li0007ffff7fffffff() { ++; CHECK-LABEL: li0007ffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666201599 ++} ++ ++define i64 @li0007ffff80000000() { ++; CHECK-LABEL: li0007ffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666201600 ++} ++ ++define i64 @li0007ffff800007ff() { ++; CHECK-LABEL: li0007ffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666203647 ++} ++ ++define i64 @li0007ffff80000800() { ++; CHECK-LABEL: li0007ffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666203648 ++} ++ ++define i64 @li0007ffff80000fff() { ++; CHECK-LABEL: li0007ffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251797666205695 ++} ++ ++define i64 @li0007fffffffff000() { ++; CHECK-LABEL: li0007fffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813681152 ++} ++ ++define i64 @li0007fffffffff7ff() { ++; CHECK-LABEL: li0007fffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813683199 ++} ++ ++define i64 @li0007fffffffff800() { ++; CHECK-LABEL: li0007fffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813683200 ++} ++ ++define i64 @li0007ffffffffffff() { ++; CHECK-LABEL: li0007ffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813685247 ++} ++ ++define i64 @li0008000000000000() { ++; CHECK-LABEL: li0008000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813685248 ++} ++ ++define i64 @li00080000000007ff() { ++; CHECK-LABEL: li00080000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813687295 ++} ++ ++define i64 @li0008000000000800() { ++; CHECK-LABEL: li0008000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813687296 ++} ++ ++define i64 @li0008000000000fff() { ++; CHECK-LABEL: li0008000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251799813689343 ++} ++ ++define i64 @li000800007ffff000() { ++; CHECK-LABEL: li000800007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961164800 ++} ++ ++define i64 @li000800007ffff7ff() { ++; CHECK-LABEL: li000800007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961166847 ++} ++ ++define i64 @li000800007ffff800() { ++; CHECK-LABEL: li000800007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961166848 ++} ++ ++define i64 @li000800007fffffff() { ++; CHECK-LABEL: li000800007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961168895 ++} ++ ++define i64 @li0008000080000000() { ++; CHECK-LABEL: li0008000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961168896 ++} ++ ++define i64 @li00080000800007ff() { ++; CHECK-LABEL: li00080000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961170943 ++} ++ ++define i64 @li0008000080000800() { ++; CHECK-LABEL: li0008000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961170944 ++} ++ ++define i64 @li0008000080000fff() { ++; CHECK-LABEL: li0008000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251801961172991 ++} ++ ++define i64 @li00080000fffff000() { ++; CHECK-LABEL: li00080000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251804108648448 ++} ++ ++define i64 @li00080000fffff7ff() { ++; CHECK-LABEL: li00080000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251804108650495 ++} ++ ++define i64 @li00080000fffff800() { ++; CHECK-LABEL: li00080000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251804108650496 ++} ++ ++define i64 @li00080000ffffffff() { ++; CHECK-LABEL: li00080000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 2251804108652543 ++} ++ ++define i64 @li000fffff00000000() { ++; CHECK-LABEL: li000fffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503595332403200 ++} ++ ++define i64 @li000fffff000007ff() { ++; CHECK-LABEL: li000fffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503595332405247 ++} ++ ++define i64 @li000fffff00000800() { ++; CHECK-LABEL: li000fffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503595332405248 ++} ++ ++define i64 @li000fffff00000fff() { ++; CHECK-LABEL: li000fffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503595332407295 ++} ++ ++define i64 @li000fffff7ffff000() { ++; CHECK-LABEL: li000fffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479882752 ++} ++ ++define i64 @li000fffff7ffff7ff() { ++; CHECK-LABEL: li000fffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479884799 ++} ++ ++define i64 @li000fffff7ffff800() { ++; CHECK-LABEL: li000fffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479884800 ++} ++ ++define i64 @li000fffff7fffffff() { ++; CHECK-LABEL: li000fffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479886847 ++} ++ ++define i64 @li000fffff80000000() { ++; CHECK-LABEL: li000fffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479886848 ++} ++ ++define i64 @li000fffff800007ff() { ++; CHECK-LABEL: li000fffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479888895 ++} ++ ++define i64 @li000fffff80000800() { ++; CHECK-LABEL: li000fffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479888896 ++} ++ ++define i64 @li000fffff80000fff() { ++; CHECK-LABEL: li000fffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503597479890943 ++} ++ ++define i64 @li000ffffffffff000() { ++; CHECK-LABEL: li000ffffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503599627366400 ++} ++ ++define i64 @li000ffffffffff7ff() { ++; CHECK-LABEL: li000ffffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503599627368447 ++} ++ ++define i64 @li000ffffffffff800() { ++; CHECK-LABEL: li000ffffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503599627368448 ++} ++ ++define i64 @li000fffffffffffff() { ++; CHECK-LABEL: li000fffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i64 4503599627370495 ++} ++ ++define i64 @li7ff0000000000000() { ++; CHECK-LABEL: li7ff0000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu52i.d $r4, $zero, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868437227405312 ++} ++ ++define i64 @li7ff00000000007ff() { ++; CHECK-LABEL: li7ff00000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868437227407359 ++} ++ ++define i64 @li7ff0000000000800() { ++; CHECK-LABEL: li7ff0000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868437227407360 ++} ++ ++define i64 @li7ff0000000000fff() { ++; CHECK-LABEL: li7ff0000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868437227409407 ++} ++ ++define i64 @li7ff000007ffff000() { ++; CHECK-LABEL: li7ff000007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374884864 ++} ++ ++define i64 @li7ff000007ffff7ff() { ++; CHECK-LABEL: li7ff000007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374886911 ++} ++ ++define i64 @li7ff000007ffff800() { ++; CHECK-LABEL: li7ff000007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374886912 ++} ++ ++define i64 @li7ff000007fffffff() { ++; CHECK-LABEL: li7ff000007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374888959 ++} ++ ++define i64 @li7ff0000080000000() { ++; CHECK-LABEL: li7ff0000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374888960 ++} ++ ++define i64 @li7ff00000800007ff() { ++; CHECK-LABEL: li7ff00000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374891007 ++} ++ ++define i64 @li7ff0000080000800() { ++; CHECK-LABEL: li7ff0000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374891008 ++} ++ ++define i64 @li7ff0000080000fff() { ++; CHECK-LABEL: li7ff0000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868439374893055 ++} ++ ++define i64 @li7ff00000fffff000() { ++; CHECK-LABEL: li7ff00000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868441522368512 ++} ++ ++define i64 @li7ff00000fffff7ff() { ++; CHECK-LABEL: li7ff00000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868441522370559 ++} ++ ++define i64 @li7ff00000fffff800() { ++; CHECK-LABEL: li7ff00000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868441522370560 ++} ++ ++define i64 @li7ff00000ffffffff() { ++; CHECK-LABEL: li7ff00000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9218868441522372607 ++} ++ ++define i64 @li7ff7ffff00000000() { ++; CHECK-LABEL: li7ff7ffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120232746123264 ++} ++ ++define i64 @li7ff7ffff000007ff() { ++; CHECK-LABEL: li7ff7ffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120232746125311 ++} ++ ++define i64 @li7ff7ffff00000800() { ++; CHECK-LABEL: li7ff7ffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120232746125312 ++} ++ ++define i64 @li7ff7ffff00000fff() { ++; CHECK-LABEL: li7ff7ffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120232746127359 ++} ++ ++define i64 @li7ff7ffff7ffff000() { ++; CHECK-LABEL: li7ff7ffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893602816 ++} ++ ++define i64 @li7ff7ffff7ffff7ff() { ++; CHECK-LABEL: li7ff7ffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893604863 ++} ++ ++define i64 @li7ff7ffff7ffff800() { ++; CHECK-LABEL: li7ff7ffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893604864 ++} ++ ++define i64 @li7ff7ffff7fffffff() { ++; CHECK-LABEL: li7ff7ffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893606911 ++} ++ ++define i64 @li7ff7ffff80000000() { ++; CHECK-LABEL: li7ff7ffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893606912 ++} ++ ++define i64 @li7ff7ffff800007ff() { ++; CHECK-LABEL: li7ff7ffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893608959 ++} ++ ++define i64 @li7ff7ffff80000800() { ++; CHECK-LABEL: li7ff7ffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893608960 ++} ++ ++define i64 @li7ff7ffff80000fff() { ++; CHECK-LABEL: li7ff7ffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120234893611007 ++} ++ ++define i64 @li7ff7fffffffff000() { ++; CHECK-LABEL: li7ff7fffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041086464 ++} ++ ++define i64 @li7ff7fffffffff7ff() { ++; CHECK-LABEL: li7ff7fffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041088511 ++} ++ ++define i64 @li7ff7fffffffff800() { ++; CHECK-LABEL: li7ff7fffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041088512 ++} ++ ++define i64 @li7ff7ffffffffffff() { ++; CHECK-LABEL: li7ff7ffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041090559 ++} ++ ++define i64 @li7ff8000000000000() { ++; CHECK-LABEL: li7ff8000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041090560 ++} ++ ++define i64 @li7ff80000000007ff() { ++; CHECK-LABEL: li7ff80000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041092607 ++} ++ ++define i64 @li7ff8000000000800() { ++; CHECK-LABEL: li7ff8000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041092608 ++} ++ ++define i64 @li7ff8000000000fff() { ++; CHECK-LABEL: li7ff8000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120237041094655 ++} ++ ++define i64 @li7ff800007ffff000() { ++; CHECK-LABEL: li7ff800007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188570112 ++} ++ ++define i64 @li7ff800007ffff7ff() { ++; CHECK-LABEL: li7ff800007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188572159 ++} ++ ++define i64 @li7ff800007ffff800() { ++; CHECK-LABEL: li7ff800007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188572160 ++} ++ ++define i64 @li7ff800007fffffff() { ++; CHECK-LABEL: li7ff800007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188574207 ++} ++ ++define i64 @li7ff8000080000000() { ++; CHECK-LABEL: li7ff8000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188574208 ++} ++ ++define i64 @li7ff80000800007ff() { ++; CHECK-LABEL: li7ff80000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188576255 ++} ++ ++define i64 @li7ff8000080000800() { ++; CHECK-LABEL: li7ff8000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188576256 ++} ++ ++define i64 @li7ff8000080000fff() { ++; CHECK-LABEL: li7ff8000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120239188578303 ++} ++ ++define i64 @li7ff80000fffff000() { ++; CHECK-LABEL: li7ff80000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120241336053760 ++} ++ ++define i64 @li7ff80000fffff7ff() { ++; CHECK-LABEL: li7ff80000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120241336055807 ++} ++ ++define i64 @li7ff80000fffff800() { ++; CHECK-LABEL: li7ff80000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120241336055808 ++} ++ ++define i64 @li7ff80000ffffffff() { ++; CHECK-LABEL: li7ff80000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9221120241336057855 ++} ++ ++define i64 @li7fffffff00000000() { ++; CHECK-LABEL: li7fffffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372032559808512 ++} ++ ++define i64 @li7fffffff000007ff() { ++; CHECK-LABEL: li7fffffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372032559810559 ++} ++ ++define i64 @li7fffffff00000800() { ++; CHECK-LABEL: li7fffffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372032559810560 ++} ++ ++define i64 @li7fffffff00000fff() { ++; CHECK-LABEL: li7fffffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372032559812607 ++} ++ ++define i64 @li7fffffff7ffff000() { ++; CHECK-LABEL: li7fffffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707288064 ++} ++ ++define i64 @li7fffffff7ffff7ff() { ++; CHECK-LABEL: li7fffffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707290111 ++} ++ ++define i64 @li7fffffff7ffff800() { ++; CHECK-LABEL: li7fffffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707290112 ++} ++ ++define i64 @li7fffffff7fffffff() { ++; CHECK-LABEL: li7fffffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707292159 ++} ++ ++define i64 @li7fffffff80000000() { ++; CHECK-LABEL: li7fffffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707292160 ++} ++ ++define i64 @li7fffffff800007ff() { ++; CHECK-LABEL: li7fffffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707294207 ++} ++ ++define i64 @li7fffffff80000800() { ++; CHECK-LABEL: li7fffffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707294208 ++} ++ ++define i64 @li7fffffff80000fff() { ++; CHECK-LABEL: li7fffffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372034707296255 ++} ++ ++define i64 @li7ffffffffffff000() { ++; CHECK-LABEL: li7ffffffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372036854771712 ++} ++ ++define i64 @li7ffffffffffff7ff() { ++; CHECK-LABEL: li7ffffffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372036854773759 ++} ++ ++define i64 @li7ffffffffffff800() { ++; CHECK-LABEL: li7ffffffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372036854773760 ++} ++ ++define i64 @li7fffffffffffffff() { ++; CHECK-LABEL: li7fffffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 9223372036854775807 ++} ++ ++define i64 @li8000000000000000() { ++; CHECK-LABEL: li8000000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu52i.d $r4, $zero, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372036854775808 ++} ++ ++define i64 @li80000000000007ff() { ++; CHECK-LABEL: li80000000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372036854773761 ++} ++ ++define i64 @li8000000000000800() { ++; CHECK-LABEL: li8000000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372036854773760 ++} ++ ++define i64 @li8000000000000fff() { ++; CHECK-LABEL: li8000000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372036854771713 ++} ++ ++define i64 @li800000007ffff000() { ++; CHECK-LABEL: li800000007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707296256 ++} ++ ++define i64 @li800000007ffff7ff() { ++; CHECK-LABEL: li800000007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707294209 ++} ++ ++define i64 @li800000007ffff800() { ++; CHECK-LABEL: li800000007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707294208 ++} ++ ++define i64 @li800000007fffffff() { ++; CHECK-LABEL: li800000007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707292161 ++} ++ ++define i64 @li8000000080000000() { ++; CHECK-LABEL: li8000000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707292160 ++} ++ ++define i64 @li80000000800007ff() { ++; CHECK-LABEL: li80000000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707290113 ++} ++ ++define i64 @li8000000080000800() { ++; CHECK-LABEL: li8000000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707290112 ++} ++ ++define i64 @li8000000080000fff() { ++; CHECK-LABEL: li8000000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372034707288065 ++} ++ ++define i64 @li80000000fffff000() { ++; CHECK-LABEL: li80000000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372032559812608 ++} ++ ++define i64 @li80000000fffff7ff() { ++; CHECK-LABEL: li80000000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372032559810561 ++} ++ ++define i64 @li80000000fffff800() { ++; CHECK-LABEL: li80000000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372032559810560 ++} ++ ++define i64 @li80000000ffffffff() { ++; CHECK-LABEL: li80000000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9223372032559808513 ++} ++ ++define i64 @li8007ffff00000000() { ++; CHECK-LABEL: li8007ffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120241336057856 ++} ++ ++define i64 @li8007ffff000007ff() { ++; CHECK-LABEL: li8007ffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120241336055809 ++} ++ ++define i64 @li8007ffff00000800() { ++; CHECK-LABEL: li8007ffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120241336055808 ++} ++ ++define i64 @li8007ffff00000fff() { ++; CHECK-LABEL: li8007ffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120241336053761 ++} ++ ++define i64 @li8007ffff7ffff000() { ++; CHECK-LABEL: li8007ffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188578304 ++} ++ ++define i64 @li8007ffff7ffff7ff() { ++; CHECK-LABEL: li8007ffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188576257 ++} ++ ++define i64 @li8007ffff7ffff800() { ++; CHECK-LABEL: li8007ffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188576256 ++} ++ ++define i64 @li8007ffff7fffffff() { ++; CHECK-LABEL: li8007ffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188574209 ++} ++ ++define i64 @li8007ffff80000000() { ++; CHECK-LABEL: li8007ffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188574208 ++} ++ ++define i64 @li8007ffff800007ff() { ++; CHECK-LABEL: li8007ffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188572161 ++} ++ ++define i64 @li8007ffff80000800() { ++; CHECK-LABEL: li8007ffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188572160 ++} ++ ++define i64 @li8007ffff80000fff() { ++; CHECK-LABEL: li8007ffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120239188570113 ++} ++ ++define i64 @li8007fffffffff000() { ++; CHECK-LABEL: li8007fffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041094656 ++} ++ ++define i64 @li8007fffffffff7ff() { ++; CHECK-LABEL: li8007fffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041092609 ++} ++ ++define i64 @li8007fffffffff800() { ++; CHECK-LABEL: li8007fffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041092608 ++} ++ ++define i64 @li8007ffffffffffff() { ++; CHECK-LABEL: li8007ffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041090561 ++} ++ ++define i64 @li8008000000000000() { ++; CHECK-LABEL: li8008000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041090560 ++} ++ ++define i64 @li80080000000007ff() { ++; CHECK-LABEL: li80080000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041088513 ++} ++ ++define i64 @li8008000000000800() { ++; CHECK-LABEL: li8008000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041088512 ++} ++ ++define i64 @li8008000000000fff() { ++; CHECK-LABEL: li8008000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120237041086465 ++} ++ ++define i64 @li800800007ffff000() { ++; CHECK-LABEL: li800800007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893611008 ++} ++ ++define i64 @li800800007ffff7ff() { ++; CHECK-LABEL: li800800007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893608961 ++} ++ ++define i64 @li800800007ffff800() { ++; CHECK-LABEL: li800800007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893608960 ++} ++ ++define i64 @li800800007fffffff() { ++; CHECK-LABEL: li800800007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893606913 ++} ++ ++define i64 @li8008000080000000() { ++; CHECK-LABEL: li8008000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893606912 ++} ++ ++define i64 @li80080000800007ff() { ++; CHECK-LABEL: li80080000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893604865 ++} ++ ++define i64 @li8008000080000800() { ++; CHECK-LABEL: li8008000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893604864 ++} ++ ++define i64 @li8008000080000fff() { ++; CHECK-LABEL: li8008000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120234893602817 ++} ++ ++define i64 @li80080000fffff000() { ++; CHECK-LABEL: li80080000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120232746127360 ++} ++ ++define i64 @li80080000fffff7ff() { ++; CHECK-LABEL: li80080000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120232746125313 ++} ++ ++define i64 @li80080000fffff800() { ++; CHECK-LABEL: li80080000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120232746125312 ++} ++ ++define i64 @li80080000ffffffff() { ++; CHECK-LABEL: li80080000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9221120232746123265 ++} ++ ++define i64 @li800fffff00000000() { ++; CHECK-LABEL: li800fffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868441522372608 ++} ++ ++define i64 @li800fffff000007ff() { ++; CHECK-LABEL: li800fffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868441522370561 ++} ++ ++define i64 @li800fffff00000800() { ++; CHECK-LABEL: li800fffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868441522370560 ++} ++ ++define i64 @li800fffff00000fff() { ++; CHECK-LABEL: li800fffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868441522368513 ++} ++ ++define i64 @li800fffff7ffff000() { ++; CHECK-LABEL: li800fffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374893056 ++} ++ ++define i64 @li800fffff7ffff7ff() { ++; CHECK-LABEL: li800fffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374891009 ++} ++ ++define i64 @li800fffff7ffff800() { ++; CHECK-LABEL: li800fffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374891008 ++} ++ ++define i64 @li800fffff7fffffff() { ++; CHECK-LABEL: li800fffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374888961 ++} ++ ++define i64 @li800fffff80000000() { ++; CHECK-LABEL: li800fffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374888960 ++} ++ ++define i64 @li800fffff800007ff() { ++; CHECK-LABEL: li800fffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374886913 ++} ++ ++define i64 @li800fffff80000800() { ++; CHECK-LABEL: li800fffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374886912 ++} ++ ++define i64 @li800fffff80000fff() { ++; CHECK-LABEL: li800fffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868439374884865 ++} ++ ++define i64 @li800ffffffffff000() { ++; CHECK-LABEL: li800ffffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868437227409408 ++} ++ ++define i64 @li800ffffffffff7ff() { ++; CHECK-LABEL: li800ffffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868437227407361 ++} ++ ++define i64 @li800ffffffffff800() { ++; CHECK-LABEL: li800ffffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868437227407360 ++} ++ ++define i64 @li800fffffffffffff() { ++; CHECK-LABEL: li800fffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -9218868437227405313 ++} ++ ++define i64 @lifff0000000000000() { ++; CHECK-LABEL: lifff0000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu52i.d $r4, $zero, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503599627370496 ++} ++ ++define i64 @lifff00000000007ff() { ++; CHECK-LABEL: lifff00000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503599627368449 ++} ++ ++define i64 @lifff0000000000800() { ++; CHECK-LABEL: lifff0000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503599627368448 ++} ++ ++define i64 @lifff0000000000fff() { ++; CHECK-LABEL: lifff0000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503599627366401 ++} ++ ++define i64 @lifff000007ffff000() { ++; CHECK-LABEL: lifff000007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479890944 ++} ++ ++define i64 @lifff000007ffff7ff() { ++; CHECK-LABEL: lifff000007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479888897 ++} ++ ++define i64 @lifff000007ffff800() { ++; CHECK-LABEL: lifff000007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479888896 ++} ++ ++define i64 @lifff000007fffffff() { ++; CHECK-LABEL: lifff000007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479886849 ++} ++ ++define i64 @lifff0000080000000() { ++; CHECK-LABEL: lifff0000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479886848 ++} ++ ++define i64 @lifff00000800007ff() { ++; CHECK-LABEL: lifff00000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479884801 ++} ++ ++define i64 @lifff0000080000800() { ++; CHECK-LABEL: lifff0000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479884800 ++} ++ ++define i64 @lifff0000080000fff() { ++; CHECK-LABEL: lifff0000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503597479882753 ++} ++ ++define i64 @lifff00000fffff000() { ++; CHECK-LABEL: lifff00000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503595332407296 ++} ++ ++define i64 @lifff00000fffff7ff() { ++; CHECK-LABEL: lifff00000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503595332405249 ++} ++ ++define i64 @lifff00000fffff800() { ++; CHECK-LABEL: lifff00000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503595332405248 ++} ++ ++define i64 @lifff00000ffffffff() { ++; CHECK-LABEL: lifff00000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4503595332403201 ++} ++ ++define i64 @lifff7ffff00000000() { ++; CHECK-LABEL: lifff7ffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251804108652544 ++} ++ ++define i64 @lifff7ffff000007ff() { ++; CHECK-LABEL: lifff7ffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251804108650497 ++} ++ ++define i64 @lifff7ffff00000800() { ++; CHECK-LABEL: lifff7ffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251804108650496 ++} ++ ++define i64 @lifff7ffff00000fff() { ++; CHECK-LABEL: lifff7ffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251804108648449 ++} ++ ++define i64 @lifff7ffff7ffff000() { ++; CHECK-LABEL: lifff7ffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961172992 ++} ++ ++define i64 @lifff7ffff7ffff7ff() { ++; CHECK-LABEL: lifff7ffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961170945 ++} ++ ++define i64 @lifff7ffff7ffff800() { ++; CHECK-LABEL: lifff7ffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961170944 ++} ++ ++define i64 @lifff7ffff7fffffff() { ++; CHECK-LABEL: lifff7ffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961168897 ++} ++ ++define i64 @lifff7ffff80000000() { ++; CHECK-LABEL: lifff7ffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961168896 ++} ++ ++define i64 @lifff7ffff800007ff() { ++; CHECK-LABEL: lifff7ffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961166849 ++} ++ ++define i64 @lifff7ffff80000800() { ++; CHECK-LABEL: lifff7ffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961166848 ++} ++ ++define i64 @lifff7ffff80000fff() { ++; CHECK-LABEL: lifff7ffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251801961164801 ++} ++ ++define i64 @lifff7fffffffff000() { ++; CHECK-LABEL: lifff7fffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813689344 ++} ++ ++define i64 @lifff7fffffffff7ff() { ++; CHECK-LABEL: lifff7fffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813687297 ++} ++ ++define i64 @lifff7fffffffff800() { ++; CHECK-LABEL: lifff7fffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813687296 ++} ++ ++define i64 @lifff7ffffffffffff() { ++; CHECK-LABEL: lifff7ffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813685249 ++} ++ ++define i64 @lifff8000000000000() { ++; CHECK-LABEL: lifff8000000000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813685248 ++} ++ ++define i64 @lifff80000000007ff() { ++; CHECK-LABEL: lifff80000000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813683201 ++} ++ ++define i64 @lifff8000000000800() { ++; CHECK-LABEL: lifff8000000000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813683200 ++} ++ ++define i64 @lifff8000000000fff() { ++; CHECK-LABEL: lifff8000000000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251799813681153 ++} ++ ++define i64 @lifff800007ffff000() { ++; CHECK-LABEL: lifff800007ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666205696 ++} ++ ++define i64 @lifff800007ffff7ff() { ++; CHECK-LABEL: lifff800007ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666203649 ++} ++ ++define i64 @lifff800007ffff800() { ++; CHECK-LABEL: lifff800007ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666203648 ++} ++ ++define i64 @lifff800007fffffff() { ++; CHECK-LABEL: lifff800007fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666201601 ++} ++ ++define i64 @lifff8000080000000() { ++; CHECK-LABEL: lifff8000080000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666201600 ++} ++ ++define i64 @lifff80000800007ff() { ++; CHECK-LABEL: lifff80000800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666199553 ++} ++ ++define i64 @lifff8000080000800() { ++; CHECK-LABEL: lifff8000080000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666199552 ++} ++ ++define i64 @lifff8000080000fff() { ++; CHECK-LABEL: lifff8000080000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251797666197505 ++} ++ ++define i64 @lifff80000fffff000() { ++; CHECK-LABEL: lifff80000fffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251795518722048 ++} ++ ++define i64 @lifff80000fffff7ff() { ++; CHECK-LABEL: lifff80000fffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251795518720001 ++} ++ ++define i64 @lifff80000fffff800() { ++; CHECK-LABEL: lifff80000fffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251795518720000 ++} ++ ++define i64 @lifff80000ffffffff() { ++; CHECK-LABEL: lifff80000ffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2251795518717953 ++} ++ ++define i64 @liffffffff00000000() { ++; CHECK-LABEL: liffffffff00000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4294967296 ++} ++ ++define i64 @liffffffff000007ff() { ++; CHECK-LABEL: liffffffff000007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4294965249 ++} ++ ++define i64 @liffffffff00000800() { ++; CHECK-LABEL: liffffffff00000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4294965248 ++} ++ ++define i64 @liffffffff00000fff() { ++; CHECK-LABEL: liffffffff00000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4294963201 ++} ++ ++define i64 @liffffffff7ffff000() { ++; CHECK-LABEL: liffffffff7ffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147487744 ++} ++ ++define i64 @liffffffff7ffff7ff() { ++; CHECK-LABEL: liffffffff7ffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147485697 ++} ++ ++define i64 @liffffffff7ffff800() { ++; CHECK-LABEL: liffffffff7ffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147485696 ++} ++ ++define i64 @liffffffff7fffffff() { ++; CHECK-LABEL: liffffffff7fffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147483649 ++} ++ ++define i64 @liffffffff80000000() { ++; CHECK-LABEL: liffffffff80000000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147483648 ++} ++ ++define i64 @liffffffff800007ff() { ++; CHECK-LABEL: liffffffff800007ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147481601 ++} ++ ++define i64 @liffffffff80000800() { ++; CHECK-LABEL: liffffffff80000800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147481600 ++} ++ ++define i64 @liffffffff80000fff() { ++; CHECK-LABEL: liffffffff80000fff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: jr $ra ++ ret i64 -2147479553 ++} ++ ++define i64 @lifffffffffffff000() { ++; CHECK-LABEL: lifffffffffffff000: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -4096 ++} ++ ++define i64 @lifffffffffffff7ff() { ++; CHECK-LABEL: lifffffffffffff7ff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: jr $ra ++ ret i64 -2049 ++} ++ ++define i64 @lifffffffffffff800() { ++; CHECK-LABEL: lifffffffffffff800: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r4, $zero, -2048 ++; CHECK-NEXT: jr $ra ++ ret i64 -2048 ++} ++ ++define i64 @liffffffffffffffff() { ++; CHECK-LABEL: liffffffffffffffff: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $r4, $zero, -1 ++; CHECK-NEXT: jr $ra ++ ret i64 -1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll b/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll +new file mode 100644 +index 000000000..986e27e2a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll +@@ -0,0 +1,8 @@ ++; RUN: llc -march=loongarch64 -no-integrated-as -o - %s | FileCheck %s ++ ++define i64 @test(i64 %a) { ++; CHECK: add.d $r4, $r4, $r0 ++entry: ++ %0 = tail call i64 asm sideeffect "add.d $0, $1, ${2:z} \0A", "=r,r,Jr"(i64 %a, i64 0) ++ ret i64 %0 ++} +diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll b/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll +new file mode 100644 +index 000000000..94e330673 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll +@@ -0,0 +1,31 @@ ++; RUN: llc -march=loongarch64 -target-abi=lp64 -o - %s 2>&1 | FileCheck %s ++ ++;; Test that floating-point bits can be stored in GPR. ++ ++define void @reg_float(float %x) { ++; CHECK-LABEL: reg_float: ++; CHECK: movfr2gr.s $r{{[0-9]+}}, $f0 ++ call void asm "", "r"(float %x) ++ ret void ++} ++ ++define void @r10_float(float %x) { ++; CHECK-LABEL: r10_float: ++; CHECK: movfr2gr.s $r10, $f0 ++ call void asm "", "{$r10}"(float %x) ++ ret void ++} ++ ++define void @reg_double(double %x) { ++; CHECK-LABEL: reg_double: ++; CHECK: movfr2gr.d $r{{[0-9]+}}, $f0 ++ call void asm "", "r"(double %x) ++ ret void ++} ++ ++define void @r10_double(double %x) { ++; CHECK-LABEL: r10_double: ++; CHECK: movfr2gr.d $r10, $f0 ++ call void asm "", "{$r10}"(double %x) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll +new file mode 100644 +index 000000000..7f58ea2ee +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll +@@ -0,0 +1,8 @@ ++; RUN: not llc -march=loongarch64 %s 2>&1 | FileCheck %s ++ ++define void @test_i128() { ++; CHECK: error: couldn't allocate input reg for constraint '{$r20}' ++start: ++ call void asm "", "{$r20}"(i128 5) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll +new file mode 100644 +index 000000000..d18a184ab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll +@@ -0,0 +1,42 @@ ++; RUN: llc -march=loongarch64 -o - %s 2>&1 | FileCheck %s ++ ++;; Test that non native value types can be parsed. ++ ++define void @test_i1() { ++; CHECK-LABEL: test_i1: ++; CHECK: ori $r6, $zero, 0 ++; CHECK: jr $ra ++start: ++ call void asm "", "{$r6}"(i1 0) ++ ret void ++} ++ ++;; Note: non-simple values like `i3` are only allowed in newer llvm versions (>= 12). ++;; In older llvm versions (<= 11), SelectionDAGBuilder::visitInlineAsm asserts simple ++;; values must be used. For details, please see https://reviews.llvm.org/D91710. ++define void @test_i3() { ++; CHECK-LABEL: test_i3: ++; CHECK: ori $r7, $zero, 0 ++; CHECK: jr $ra ++start: ++ call void asm "", "{$r7}"(i3 0) ++ ret void ++} ++ ++define void @test_i8() { ++; CHECK-LABEL: test_i8: ++; CHECK: ori $r5, $zero, 0 ++; CHECK: jr $ra ++start: ++ call void asm "", "{$r5}"(i8 0) ++ ret void ++} ++ ++define void @test_i16() { ++; CHECK-LABEL: test_i16: ++; CHECK: ori $r20, $zero, 5 ++; CHECK: jr $ra ++start: ++ call void asm "", "{$r20}"(i16 5) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll b/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll +new file mode 100644 +index 000000000..8dbbed99f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll +@@ -0,0 +1,8 @@ ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define void @preld(i32* %p) { ++entry: ++ ; CHECK: preld 10, $r4, 23 ++ tail call void asm sideeffect "preld 10, $0, 23 \0A\09", "r"(i32* %p) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll +deleted file mode 100644 +index bfa1a5975..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll ++++ /dev/null +@@ -1,183 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'add' LLVM IR: https://llvm.org/docs/LangRef.html#add-instruction +- +-define i1 @add_i1(i1 %x, i1 %y) { +-; LA32-LABEL: add_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i1 %x, %y +- ret i1 %add +-} +- +-define i8 @add_i8(i8 %x, i8 %y) { +-; LA32-LABEL: add_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i8 %x, %y +- ret i8 %add +-} +- +-define i16 @add_i16(i16 %x, i16 %y) { +-; LA32-LABEL: add_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i16 %x, %y +- ret i16 %add +-} +- +-define i32 @add_i32(i32 %x, i32 %y) { +-; LA32-LABEL: add_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i32 %x, %y +- ret i32 %add +-} +- +-;; Match the pattern: +-;; def : PatGprGpr_32; +-define signext i32 @add_i32_sext(i32 %x, i32 %y) { +-; LA32-LABEL: add_i32_sext: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i32_sext: +-; LA64: # %bb.0: +-; LA64-NEXT: add.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i32 %x, %y +- ret i32 %add +-} +- +-define i64 @add_i64(i64 %x, i64 %y) { +-; LA32-LABEL: add_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: add.w $a1, $a1, $a3 +-; LA32-NEXT: add.w $a2, $a0, $a2 +-; LA32-NEXT: sltu $a0, $a2, $a0 +-; LA32-NEXT: add.w $a1, $a1, $a0 +-; LA32-NEXT: move $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i64 %x, %y +- ret i64 %add +-} +- +-define i1 @add_i1_3(i1 %x) { +-; LA32-LABEL: add_i1_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i1_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i1 %x, 3 +- ret i1 %add +-} +- +-define i8 @add_i8_3(i8 %x) { +-; LA32-LABEL: add_i8_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i8_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i8 %x, 3 +- ret i8 %add +-} +- +-define i16 @add_i16_3(i16 %x) { +-; LA32-LABEL: add_i16_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i16_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i16 %x, 3 +- ret i16 %add +-} +- +-define i32 @add_i32_3(i32 %x) { +-; LA32-LABEL: add_i32_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i32_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i32 %x, 3 +- ret i32 %add +-} +- +-;; Match the pattern: +-;; def : PatGprImm_32; +-define signext i32 @add_i32_3_sext(i32 %x) { +-; LA32-LABEL: add_i32_3_sext: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i32_3_sext: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.w $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i32 %x, 3 +- ret i32 %add +-} +- +-define i64 @add_i64_3(i64 %x) { +-; LA32-LABEL: add_i64_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a2, $a0, 3 +-; LA32-NEXT: sltu $a0, $a2, $a0 +-; LA32-NEXT: add.w $a1, $a1, $a0 +-; LA32-NEXT: move $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: add_i64_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = add i64 %x, 3 +- ret i64 %add +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll +deleted file mode 100644 +index e5c9da58c..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll ++++ /dev/null +@@ -1,266 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'and' LLVM IR: https://llvm.org/docs/LangRef.html#and-instruction +- +-define i1 @and_i1(i1 %a, i1 %b) { +-; LA32-LABEL: and_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i1 %a, %b +- ret i1 %r +-} +- +-define i8 @and_i8(i8 %a, i8 %b) { +-; LA32-LABEL: and_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i8 %a, %b +- ret i8 %r +-} +- +-define i16 @and_i16(i16 %a, i16 %b) { +-; LA32-LABEL: and_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i16 %a, %b +- ret i16 %r +-} +- +-define i32 @and_i32(i32 %a, i32 %b) { +-; LA32-LABEL: and_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i32 %a, %b +- ret i32 %r +-} +- +-define i64 @and_i64(i64 %a, i64 %b) { +-; LA32-LABEL: and_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: and $a0, $a0, $a2 +-; LA32-NEXT: and $a1, $a1, $a3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i64 %a, %b +- ret i64 %r +-} +- +-define i1 @and_i1_0(i1 %b) { +-; LA32-LABEL: and_i1_0: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: move $a0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i1_0: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: move $a0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i1 4, %b +- ret i1 %r +-} +- +-define i1 @and_i1_5(i1 %b) { +-; LA32-LABEL: and_i1_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i1_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i1 5, %b +- ret i1 %r +-} +- +-define i8 @and_i8_5(i8 %b) { +-; LA32-LABEL: and_i8_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i8_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i8 5, %b +- ret i8 %r +-} +- +-define i8 @and_i8_257(i8 %b) { +-; LA32-LABEL: and_i8_257: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i8_257: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i8 257, %b +- ret i8 %r +-} +- +-define i16 @and_i16_5(i16 %b) { +-; LA32-LABEL: and_i16_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i16_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i16 5, %b +- ret i16 %r +-} +- +-define i16 @and_i16_0x1000(i16 %b) { +-; LA32-LABEL: and_i16_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i16_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i16 4096, %b +- ret i16 %r +-} +- +-define i16 @and_i16_0x10001(i16 %b) { +-; LA32-LABEL: and_i16_0x10001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i16_0x10001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i16 65537, %b +- ret i16 %r +-} +- +-define i32 @and_i32_5(i32 %b) { +-; LA32-LABEL: and_i32_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i32_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i32 5, %b +- ret i32 %r +-} +- +-define i32 @and_i32_0x1000(i32 %b) { +-; LA32-LABEL: and_i32_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i32_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i32 4096, %b +- ret i32 %r +-} +- +-define i32 @and_i32_0x100000001(i32 %b) { +-; LA32-LABEL: and_i32_0x100000001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i32_0x100000001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i32 4294967297, %b +- ret i32 %r +-} +- +-define i64 @and_i64_5(i64 %b) { +-; LA32-LABEL: and_i64_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a0, $a0, 5 +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i64_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i64 5, %b +- ret i64 %r +-} +- +-define i64 @and_i64_0x1000(i64 %b) { +-; LA32-LABEL: and_i64_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: and $a0, $a0, $a1 +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: and_i64_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: and $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = and i64 4096, %b +- ret i64 %r +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll +deleted file mode 100644 +index 1b7e80851..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll ++++ /dev/null +@@ -1,168 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'ashr' LLVM IR: https://llvm.org/docs/LangRef.html#ashr-instruction +- +-define i1 @ashr_i1(i1 %x, i1 %y) { +-; LA32-LABEL: ashr_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i1 %x, %y +- ret i1 %ashr +-} +- +-define i8 @ashr_i8(i8 %x, i8 %y) { +-; LA32-LABEL: ashr_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: sra.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i8 %x, %y +- ret i8 %ashr +-} +- +-define i16 @ashr_i16(i16 %x, i16 %y) { +-; LA32-LABEL: ashr_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: sra.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i16 %x, %y +- ret i16 %ashr +-} +- +-define i32 @ashr_i32(i32 %x, i32 %y) { +-; LA32-LABEL: ashr_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: sra.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i32 %x, %y +- ret i32 %ashr +-} +- +-define i64 @ashr_i64(i64 %x, i64 %y) { +-; LA32-LABEL: ashr_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: srai.w $a3, $a1, 31 +-; LA32-NEXT: addi.w $a4, $a2, -32 +-; LA32-NEXT: slti $a5, $a4, 0 +-; LA32-NEXT: masknez $a3, $a3, $a5 +-; LA32-NEXT: sra.w $a6, $a1, $a2 +-; LA32-NEXT: maskeqz $a6, $a6, $a5 +-; LA32-NEXT: or $a3, $a6, $a3 +-; LA32-NEXT: srl.w $a0, $a0, $a2 +-; LA32-NEXT: xori $a2, $a2, 31 +-; LA32-NEXT: slli.w $a6, $a1, 1 +-; LA32-NEXT: sll.w $a2, $a6, $a2 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: sra.w $a1, $a1, $a4 +-; LA32-NEXT: maskeqz $a0, $a0, $a5 +-; LA32-NEXT: masknez $a1, $a1, $a5 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: move $a1, $a3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: sra.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i64 %x, %y +- ret i64 %ashr +-} +- +-define i1 @ashr_i1_3(i1 %x) { +-; LA32-LABEL: ashr_i1_3: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i1_3: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i1 %x, 3 +- ret i1 %ashr +-} +- +-define i8 @ashr_i8_3(i8 %x) { +-; LA32-LABEL: ashr_i8_3: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: srai.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i8_3: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: srai.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i8 %x, 3 +- ret i8 %ashr +-} +- +-define i16 @ashr_i16_3(i16 %x) { +-; LA32-LABEL: ashr_i16_3: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: srai.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i16_3: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: srai.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i16 %x, 3 +- ret i16 %ashr +-} +- +-define i32 @ashr_i32_3(i32 %x) { +-; LA32-LABEL: ashr_i32_3: +-; LA32: # %bb.0: +-; LA32-NEXT: srai.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i32_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: srai.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i32 %x, 3 +- ret i32 %ashr +-} +- +-define i64 @ashr_i64_3(i64 %x) { +-; LA32-LABEL: ashr_i64_3: +-; LA32: # %bb.0: +-; LA32-NEXT: srli.w $a0, $a0, 3 +-; LA32-NEXT: slli.w $a2, $a1, 29 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: srai.w $a1, $a1, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ashr_i64_3: +-; LA64: # %bb.0: +-; LA64-NEXT: srai.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %ashr = ashr i64 %x, 3 +- ret i64 %ashr +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll +deleted file mode 100644 +index f46eca268..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll ++++ /dev/null +@@ -1,358 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=ALL,LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=ALL,LA64 +- +-define void @foo() noreturn nounwind { +-; ALL-LABEL: foo: +-; ALL: # %bb.0: # %entry +-; ALL-NEXT: .LBB0_1: # %loop +-; ALL-NEXT: # =>This Inner Loop Header: Depth=1 +-; ALL-NEXT: b .LBB0_1 +-entry: +- br label %loop +-loop: +- br label %loop +-} +- +-define void @foo_br_eq(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_eq: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: beq $a2, $a0, .LBB1_2 +-; LA32-NEXT: b .LBB1_1 +-; LA32-NEXT: .LBB1_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB1_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_eq: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: beq $a2, $a0, .LBB1_2 +-; LA64-NEXT: b .LBB1_1 +-; LA64-NEXT: .LBB1_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB1_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp eq i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_ne(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_ne: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bne $a2, $a0, .LBB2_2 +-; LA32-NEXT: b .LBB2_1 +-; LA32-NEXT: .LBB2_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB2_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_ne: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: bne $a2, $a0, .LBB2_2 +-; LA64-NEXT: b .LBB2_1 +-; LA64-NEXT: .LBB2_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB2_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp ne i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_slt(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_slt: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: blt $a2, $a0, .LBB3_2 +-; LA32-NEXT: b .LBB3_1 +-; LA32-NEXT: .LBB3_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB3_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_slt: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a2, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: blt $a2, $a0, .LBB3_2 +-; LA64-NEXT: b .LBB3_1 +-; LA64-NEXT: .LBB3_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB3_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp slt i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_sge(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_sge: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bge $a2, $a0, .LBB4_2 +-; LA32-NEXT: b .LBB4_1 +-; LA32-NEXT: .LBB4_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB4_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_sge: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a2, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: bge $a2, $a0, .LBB4_2 +-; LA64-NEXT: b .LBB4_1 +-; LA64-NEXT: .LBB4_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB4_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp sge i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_ult(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bltu $a2, $a0, .LBB5_2 +-; LA32-NEXT: b .LBB5_1 +-; LA32-NEXT: .LBB5_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB5_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: bltu $a2, $a0, .LBB5_2 +-; LA64-NEXT: b .LBB5_1 +-; LA64-NEXT: .LBB5_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB5_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp ult i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_uge(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bgeu $a2, $a0, .LBB6_2 +-; LA32-NEXT: b .LBB6_1 +-; LA32-NEXT: .LBB6_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB6_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: bgeu $a2, $a0, .LBB6_2 +-; LA64-NEXT: b .LBB6_1 +-; LA64-NEXT: .LBB6_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB6_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp uge i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-;; Check for condition codes that don't have a matching instruction. +-define void @foo_br_sgt(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_sgt: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: blt $a0, $a2, .LBB7_2 +-; LA32-NEXT: b .LBB7_1 +-; LA32-NEXT: .LBB7_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB7_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_sgt: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a2, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: blt $a0, $a2, .LBB7_2 +-; LA64-NEXT: b .LBB7_1 +-; LA64-NEXT: .LBB7_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB7_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp sgt i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_sle(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_sle: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bge $a0, $a2, .LBB8_2 +-; LA32-NEXT: b .LBB8_1 +-; LA32-NEXT: .LBB8_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB8_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_sle: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a2, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: bge $a0, $a2, .LBB8_2 +-; LA64-NEXT: b .LBB8_1 +-; LA64-NEXT: .LBB8_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB8_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp sle i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_ugt(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bltu $a0, $a2, .LBB9_2 +-; LA32-NEXT: b .LBB9_1 +-; LA32-NEXT: .LBB9_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB9_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: bltu $a0, $a2, .LBB9_2 +-; LA64-NEXT: b .LBB9_1 +-; LA64-NEXT: .LBB9_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB9_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp ugt i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-define void @foo_br_ule(i32 %a, ptr %b) nounwind { +-; LA32-LABEL: foo_br_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a2, $a1, 0 +-; LA32-NEXT: bgeu $a0, $a2, .LBB10_2 +-; LA32-NEXT: b .LBB10_1 +-; LA32-NEXT: .LBB10_1: # %test +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: .LBB10_2: # %end +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: foo_br_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a2, $a1, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: bgeu $a0, $a2, .LBB10_2 +-; LA64-NEXT: b .LBB10_1 +-; LA64-NEXT: .LBB10_1: # %test +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: .LBB10_2: # %end +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %b +- %cc = icmp ule i32 %val, %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %b +- br label %end +- +-end: +- ret void +-} +- +-;; Check the case of a branch where the condition was generated in another +-;; function. +-define void @foo_br_cc(ptr %a, i1 %cc) nounwind { +-; ALL-LABEL: foo_br_cc: +-; ALL: # %bb.0: +-; ALL-NEXT: ld.w $a2, $a0, 0 +-; ALL-NEXT: andi $a1, $a1, 1 +-; ALL-NEXT: bnez $a1, .LBB11_2 +-; ALL-NEXT: b .LBB11_1 +-; ALL-NEXT: .LBB11_1: # %test +-; ALL-NEXT: ld.w $a0, $a0, 0 +-; ALL-NEXT: .LBB11_2: # %end +-; ALL-NEXT: jirl $zero, $ra, 0 +- %val = load volatile i32, ptr %a +- br i1 %cc, label %end, label %test +-test: +- %tmp = load volatile i32, ptr %a +- br label %end +- +-end: +- ret void +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll +deleted file mode 100644 +index 596ea22e5..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll ++++ /dev/null +@@ -1,88 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s +- +-declare i32 @external_function(i32) +- +-define i32 @test_call_external(i32 %a) nounwind { +-; LA32-LABEL: test_call_external: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl external_function +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: test_call_external: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -16 +-; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; LA64-NEXT: bl external_function +-; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 16 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = call i32 @external_function(i32 %a) +- ret i32 %1 +-} +- +-define i32 @defined_function(i32 %a) nounwind { +-; LA32-LABEL: defined_function: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: defined_function: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = add i32 %a, 1 +- ret i32 %1 +-} +- +-define i32 @test_call_defined(i32 %a) nounwind { +-; LA32-LABEL: test_call_defined: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl defined_function +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: test_call_defined: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -16 +-; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; LA64-NEXT: bl defined_function +-; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 16 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = call i32 @defined_function(i32 %a) nounwind +- ret i32 %1 +-} +- +-define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { +-; LA32-LABEL: test_call_indirect: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: move $a2, $a0 +-; LA32-NEXT: move $a0, $a1 +-; LA32-NEXT: jirl $ra, $a2, 0 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: test_call_indirect: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -16 +-; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; LA64-NEXT: move $a2, $a0 +-; LA64-NEXT: move $a0, $a1 +-; LA64-NEXT: jirl $ra, $a2, 0 +-; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 16 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = call i32 %a(i32 %b) +- ret i32 %1 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +deleted file mode 100644 +index 33f6dbee7..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll ++++ /dev/null +@@ -1,329 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-define float @convert_double_to_float(double %a) nounwind { +-; LA32-LABEL: convert_double_to_float: +-; LA32: # %bb.0: +-; LA32-NEXT: fcvt.s.d $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_double_to_float: +-; LA64: # %bb.0: +-; LA64-NEXT: fcvt.s.d $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fptrunc double %a to float +- ret float %1 +-} +- +-define double @convert_float_to_double(float %a) nounwind { +-; LA32-LABEL: convert_float_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: fcvt.d.s $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_float_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: fcvt.d.s $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fpext float %a to double +- ret double %1 +-} +- +-define double @convert_i8_to_double(i8 signext %a) nounwind { +-; LA32-LABEL: convert_i8_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.d.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_i8_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.d.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i8 %a to double +- ret double %1 +-} +- +-define double @convert_i16_to_double(i16 signext %a) nounwind { +-; LA32-LABEL: convert_i16_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.d.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_i16_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.d.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i16 %a to double +- ret double %1 +-} +- +-define double @convert_i32_to_double(i32 %a) nounwind { +-; LA32-LABEL: convert_i32_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.d.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_i32_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.d.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i32 %a to double +- ret double %1 +-} +- +-define double @convert_i64_to_double(i64 %a) nounwind { +-; LA32-LABEL: convert_i64_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl __floatdidf +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_i64_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.d $fa0, $a0 +-; LA64-NEXT: ffint.d.l $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i64 %a to double +- ret double %1 +-} +- +-define i32 @convert_double_to_i32(double %a) nounwind { +-; LA32-LABEL: convert_double_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: ftintrz.w.d $fa0, $fa0 +-; LA32-NEXT: movfr2gr.s $a0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_double_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: ftintrz.w.d $fa0, $fa0 +-; LA64-NEXT: movfr2gr.s $a0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi double %a to i32 +- ret i32 %1 +-} +- +-define i32 @convert_double_to_u32(double %a) nounwind { +-; LA32-LABEL: convert_double_to_u32: +-; LA32: # %bb.0: +-; LA32-NEXT: pcalau12i $a0, .LCPI7_0 +-; LA32-NEXT: addi.w $a0, $a0, .LCPI7_0 +-; LA32-NEXT: fld.d $fa1, $a0, 0 +-; LA32-NEXT: fsub.d $fa2, $fa0, $fa1 +-; LA32-NEXT: ftintrz.w.d $fa2, $fa2 +-; LA32-NEXT: movfr2gr.s $a0, $fa2 +-; LA32-NEXT: lu12i.w $a1, -524288 +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a1, $fcc0 +-; LA32-NEXT: masknez $a0, $a0, $a1 +-; LA32-NEXT: ftintrz.w.d $fa0, $fa0 +-; LA32-NEXT: movfr2gr.s $a2, $fa0 +-; LA32-NEXT: maskeqz $a1, $a2, $a1 +-; LA32-NEXT: or $a0, $a1, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_double_to_u32: +-; LA64: # %bb.0: +-; LA64-NEXT: ftintrz.l.d $fa0, $fa0 +-; LA64-NEXT: movfr2gr.d $a0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui double %a to i32 +- ret i32 %1 +-} +- +-define i64 @convert_double_to_i64(double %a) nounwind { +-; LA32-LABEL: convert_double_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl __fixdfdi +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_double_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: ftintrz.l.d $fa0, $fa0 +-; LA64-NEXT: movfr2gr.d $a0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi double %a to i64 +- ret i64 %1 +-} +- +-define i64 @convert_double_to_u64(double %a) nounwind { +-; LA32-LABEL: convert_double_to_u64: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl __fixunsdfdi +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_double_to_u64: +-; LA64: # %bb.0: +-; LA64-NEXT: pcalau12i $a0, .LCPI9_0 +-; LA64-NEXT: addi.d $a0, $a0, .LCPI9_0 +-; LA64-NEXT: fld.d $fa1, $a0, 0 +-; LA64-NEXT: fsub.d $fa2, $fa0, $fa1 +-; LA64-NEXT: ftintrz.l.d $fa2, $fa2 +-; LA64-NEXT: movfr2gr.d $a0, $fa2 +-; LA64-NEXT: lu52i.d $a1, $zero, -2048 +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a1, $fcc0 +-; LA64-NEXT: masknez $a0, $a0, $a1 +-; LA64-NEXT: ftintrz.l.d $fa0, $fa0 +-; LA64-NEXT: movfr2gr.d $a2, $fa0 +-; LA64-NEXT: maskeqz $a1, $a2, $a1 +-; LA64-NEXT: or $a0, $a1, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui double %a to i64 +- ret i64 %1 +-} +- +-define double @convert_u8_to_double(i8 zeroext %a) nounwind { +-; LA32-LABEL: convert_u8_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.d.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_u8_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.d.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i8 %a to double +- ret double %1 +-} +- +-define double @convert_u16_to_double(i16 zeroext %a) nounwind { +-; LA32-LABEL: convert_u16_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: movgr2fr.w $fa0, $a0 +-; LA32-NEXT: ffint.d.w $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_u16_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.w $fa0, $a0 +-; LA64-NEXT: ffint.d.w $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i16 %a to double +- ret double %1 +-} +- +-define double @convert_u32_to_double(i32 %a) nounwind { +-; LA32-LABEL: convert_u32_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: addi.w $a1, $sp, 8 +-; LA32-NEXT: ori $a1, $a1, 4 +-; LA32-NEXT: lu12i.w $a2, 275200 +-; LA32-NEXT: st.w $a2, $a1, 0 +-; LA32-NEXT: st.w $a0, $sp, 8 +-; LA32-NEXT: pcalau12i $a0, .LCPI12_0 +-; LA32-NEXT: addi.w $a0, $a0, .LCPI12_0 +-; LA32-NEXT: fld.d $fa0, $a0, 0 +-; LA32-NEXT: fld.d $fa1, $sp, 8 +-; LA32-NEXT: fsub.d $fa0, $fa1, $fa0 +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_u32_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: lu52i.d $a1, $zero, 1107 +-; LA64-NEXT: movgr2fr.d $fa0, $a1 +-; LA64-NEXT: pcalau12i $a1, .LCPI12_0 +-; LA64-NEXT: addi.d $a1, $a1, .LCPI12_0 +-; LA64-NEXT: fld.d $fa1, $a1, 0 +-; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 +-; LA64-NEXT: lu12i.w $a1, 275200 +-; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 +-; LA64-NEXT: movgr2fr.d $fa1, $a0 +-; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i32 %a to double +- ret double %1 +-} +- +-define double @convert_u64_to_double(i64 %a) nounwind { +-; LA32-LABEL: convert_u64_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: bl __floatundidf +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: convert_u64_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: srli.d $a1, $a0, 32 +-; LA64-NEXT: lu52i.d $a2, $zero, 1107 +-; LA64-NEXT: or $a1, $a1, $a2 +-; LA64-NEXT: movgr2fr.d $fa0, $a1 +-; LA64-NEXT: pcalau12i $a1, .LCPI13_0 +-; LA64-NEXT: addi.d $a1, $a1, .LCPI13_0 +-; LA64-NEXT: fld.d $fa1, $a1, 0 +-; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 +-; LA64-NEXT: lu12i.w $a1, 275200 +-; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 +-; LA64-NEXT: movgr2fr.d $fa1, $a0 +-; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i64 %a to double +- ret double %1 +-} +- +-define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind { +-; LA32-LABEL: bitcast_i64_to_double: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: addi.w $a2, $sp, 8 +-; LA32-NEXT: ori $a2, $a2, 4 +-; LA32-NEXT: st.w $a1, $a2, 0 +-; LA32-NEXT: st.w $a0, $sp, 8 +-; LA32-NEXT: fld.d $fa0, $sp, 8 +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bitcast_i64_to_double: +-; LA64: # %bb.0: +-; LA64-NEXT: movgr2fr.d $fa0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = bitcast i64 %a to double +- ret double %1 +-} +- +-define i64 @bitcast_double_to_i64(double %a) nounwind { +-; LA32-LABEL: bitcast_double_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: fst.d $fa0, $sp, 8 +-; LA32-NEXT: addi.w $a0, $sp, 8 +-; LA32-NEXT: ori $a0, $a0, 4 +-; LA32-NEXT: ld.w $a1, $a0, 0 +-; LA32-NEXT: ld.w $a0, $sp, 8 +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bitcast_double_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: movfr2gr.d $a0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = bitcast double %a to i64 +- ret i64 %1 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll +deleted file mode 100644 +index 15e1118d2..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll ++++ /dev/null +@@ -1,32 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'fadd' LLVM IR: https://llvm.org/docs/LangRef.html#fadd-instruction +- +-define float @fadd_s(float %x, float %y) { +-; LA32-LABEL: fadd_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fadd_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = fadd float %x, %y +- ret float %add +-} +- +-define double @fadd_d(double %x, double %y) { +-; LA32-LABEL: fadd_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fadd_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %add = fadd double %x, %y +- ret double %add +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +deleted file mode 100644 +index bb35405ab..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll ++++ /dev/null +@@ -1,257 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test the 'fcmp' LLVM IR: https://llvm.org/docs/LangRef.html#fcmp-instruction +-;; over double values. +- +-define i1 @fcmp_false(double %a, double %b) { +-; LA32-LABEL: fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: move $a0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: move $a0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_oeq(double %a, double %b) { +-; LA32-LABEL: fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ogt(double %a, double %b) { +-; LA32-LABEL: fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_oge(double %a, double %b) { +-; LA32-LABEL: fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_olt(double %a, double %b) { +-; LA32-LABEL: fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ole(double %a, double %b) { +-; LA32-LABEL: fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_one(double %a, double %b) { +-; LA32-LABEL: fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ord(double %a, double %b) { +-; LA32-LABEL: fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ueq(double %a, double %b) { +-; LA32-LABEL: fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ugt(double %a, double %b) { +-; LA32-LABEL: fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_uge(double %a, double %b) { +-; LA32-LABEL: fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ult(double %a, double %b) { +-; LA32-LABEL: fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ule(double %a, double %b) { +-; LA32-LABEL: fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_une(double %a, double %b) { +-; LA32-LABEL: fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_uno(double %a, double %b) { +-; LA32-LABEL: fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno double %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_true(double %a, double %b) { +-; LA32-LABEL: fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: ori $a0, $zero, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: ori $a0, $zero, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true double %a, %b +- ret i1 %cmp +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll +deleted file mode 100644 +index 33bdd0b50..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll ++++ /dev/null +@@ -1,257 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test the 'fcmp' LLVM IR: https://llvm.org/docs/LangRef.html#fcmp-instruction +-;; over float values. +- +-define i1 @fcmp_false(float %a, float %b) { +-; LA32-LABEL: fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: move $a0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: move $a0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_oeq(float %a, float %b) { +-; LA32-LABEL: fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ogt(float %a, float %b) { +-; LA32-LABEL: fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_oge(float %a, float %b) { +-; LA32-LABEL: fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_olt(float %a, float %b) { +-; LA32-LABEL: fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ole(float %a, float %b) { +-; LA32-LABEL: fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_one(float %a, float %b) { +-; LA32-LABEL: fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ord(float %a, float %b) { +-; LA32-LABEL: fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ueq(float %a, float %b) { +-; LA32-LABEL: fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ugt(float %a, float %b) { +-; LA32-LABEL: fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_uge(float %a, float %b) { +-; LA32-LABEL: fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ult(float %a, float %b) { +-; LA32-LABEL: fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_ule(float %a, float %b) { +-; LA32-LABEL: fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_une(float %a, float %b) { +-; LA32-LABEL: fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_uno(float %a, float %b) { +-; LA32-LABEL: fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno float %a, %b +- ret i1 %cmp +-} +- +-define i1 @fcmp_true(float %a, float %b) { +-; LA32-LABEL: fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: ori $a0, $zero, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: ori $a0, $zero, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true float %a, %b +- ret i1 %cmp +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll +deleted file mode 100644 +index 9c3f85950..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll ++++ /dev/null +@@ -1,32 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction +- +-define float @fdiv_s(float %x, float %y) { +-; LA32-LABEL: fdiv_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fdiv.s $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fdiv_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fdiv.s $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %div = fdiv float %x, %y +- ret float %div +-} +- +-define double @fdiv_d(double %x, double %y) { +-; LA32-LABEL: fdiv_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fdiv.d $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fdiv_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fdiv.d $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %div = fdiv double %x, %y +- ret double %div +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +deleted file mode 100644 +index f8c98bbc7..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll ++++ /dev/null +@@ -1,58 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-define void @fence_acquire() nounwind { +-; LA32-LABEL: fence_acquire: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fence_acquire: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- fence acquire +- ret void +-} +- +-define void @fence_release() nounwind { +-; LA32-LABEL: fence_release: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fence_release: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- fence release +- ret void +-} +- +-define void @fence_acq_rel() nounwind { +-; LA32-LABEL: fence_acq_rel: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fence_acq_rel: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- fence acq_rel +- ret void +-} +- +-define void @fence_seq_cst() nounwind { +-; LA32-LABEL: fence_seq_cst: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fence_seq_cst: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- fence seq_cst +- ret void +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +deleted file mode 100644 +index 30e0045a1..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll ++++ /dev/null +@@ -1,650 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D +- +-define signext i8 @convert_float_to_i8(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_i8: +-; LA32F: # %bb.0: +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_i8: +-; LA32D: # %bb.0: +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_i8: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_i8: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi float %a to i8 +- ret i8 %1 +-} +- +-define signext i16 @convert_float_to_i16(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_i16: +-; LA32F: # %bb.0: +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_i16: +-; LA32D: # %bb.0: +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_i16: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_i16: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi float %a to i16 +- ret i16 %1 +-} +- +-define i32 @convert_float_to_i32(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_i32: +-; LA32F: # %bb.0: +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_i32: +-; LA32D: # %bb.0: +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_i32: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_i32: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.s $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi float %a to i32 +- ret i32 %1 +-} +- +-define i64 @convert_float_to_i64(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_i64: +-; LA32F: # %bb.0: +-; LA32F-NEXT: addi.w $sp, $sp, -16 +-; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32F-NEXT: bl __fixsfdi +-; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32F-NEXT: addi.w $sp, $sp, 16 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_i64: +-; LA32D: # %bb.0: +-; LA32D-NEXT: addi.w $sp, $sp, -16 +-; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32D-NEXT: bl __fixsfdi +-; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32D-NEXT: addi.w $sp, $sp, 16 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_i64: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_i64: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptosi float %a to i64 +- ret i64 %1 +-} +- +-define zeroext i8 @convert_float_to_u8(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_u8: +-; LA32F: # %bb.0: +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_u8: +-; LA32D: # %bb.0: +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_u8: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_u8: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui float %a to i8 +- ret i8 %1 +-} +- +-define zeroext i16 @convert_float_to_u16(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_u16: +-; LA32F: # %bb.0: +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_u16: +-; LA32D: # %bb.0: +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_u16: +-; LA64F: # %bb.0: +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_u16: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui float %a to i16 +- ret i16 %1 +-} +- +-define i32 @convert_float_to_u32(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_u32: +-; LA32F: # %bb.0: +-; LA32F-NEXT: pcalau12i $a0, .LCPI6_0 +-; LA32F-NEXT: addi.w $a0, $a0, .LCPI6_0 +-; LA32F-NEXT: fld.s $fa1, $a0, 0 +-; LA32F-NEXT: fsub.s $fa2, $fa0, $fa1 +-; LA32F-NEXT: ftintrz.w.s $fa2, $fa2 +-; LA32F-NEXT: movfr2gr.s $a0, $fa2 +-; LA32F-NEXT: lu12i.w $a1, -524288 +-; LA32F-NEXT: xor $a0, $a0, $a1 +-; LA32F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA32F-NEXT: movcf2gr $a1, $fcc0 +-; LA32F-NEXT: masknez $a0, $a0, $a1 +-; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32F-NEXT: movfr2gr.s $a2, $fa0 +-; LA32F-NEXT: maskeqz $a1, $a2, $a1 +-; LA32F-NEXT: or $a0, $a1, $a0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_u32: +-; LA32D: # %bb.0: +-; LA32D-NEXT: pcalau12i $a0, .LCPI6_0 +-; LA32D-NEXT: addi.w $a0, $a0, .LCPI6_0 +-; LA32D-NEXT: fld.s $fa1, $a0, 0 +-; LA32D-NEXT: fsub.s $fa2, $fa0, $fa1 +-; LA32D-NEXT: ftintrz.w.s $fa2, $fa2 +-; LA32D-NEXT: movfr2gr.s $a0, $fa2 +-; LA32D-NEXT: lu12i.w $a1, -524288 +-; LA32D-NEXT: xor $a0, $a0, $a1 +-; LA32D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA32D-NEXT: movcf2gr $a1, $fcc0 +-; LA32D-NEXT: masknez $a0, $a0, $a1 +-; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA32D-NEXT: movfr2gr.s $a2, $fa0 +-; LA32D-NEXT: maskeqz $a1, $a2, $a1 +-; LA32D-NEXT: or $a0, $a1, $a0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_u32: +-; LA64F: # %bb.0: +-; LA64F-NEXT: pcalau12i $a0, .LCPI6_0 +-; LA64F-NEXT: addi.d $a0, $a0, .LCPI6_0 +-; LA64F-NEXT: fld.s $fa1, $a0, 0 +-; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 +-; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 +-; LA64F-NEXT: movfr2gr.s $a0, $fa2 +-; LA64F-NEXT: lu12i.w $a1, -524288 +-; LA64F-NEXT: xor $a0, $a0, $a1 +-; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64F-NEXT: movcf2gr $a1, $fcc0 +-; LA64F-NEXT: masknez $a0, $a0, $a1 +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a2, $fa0 +-; LA64F-NEXT: maskeqz $a1, $a2, $a1 +-; LA64F-NEXT: or $a0, $a1, $a0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_u32: +-; LA64D: # %bb.0: +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui float %a to i32 +- ret i32 %1 +-} +- +-define i64 @convert_float_to_u64(float %a) nounwind { +-; LA32F-LABEL: convert_float_to_u64: +-; LA32F: # %bb.0: +-; LA32F-NEXT: addi.w $sp, $sp, -16 +-; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32F-NEXT: bl __fixunssfdi +-; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32F-NEXT: addi.w $sp, $sp, 16 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_float_to_u64: +-; LA32D: # %bb.0: +-; LA32D-NEXT: addi.w $sp, $sp, -16 +-; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32D-NEXT: bl __fixunssfdi +-; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32D-NEXT: addi.w $sp, $sp, 16 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_float_to_u64: +-; LA64F: # %bb.0: +-; LA64F-NEXT: pcalau12i $a0, .LCPI7_0 +-; LA64F-NEXT: addi.d $a0, $a0, .LCPI7_0 +-; LA64F-NEXT: fld.s $fa1, $a0, 0 +-; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 +-; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 +-; LA64F-NEXT: movfr2gr.s $a0, $fa2 +-; LA64F-NEXT: lu52i.d $a1, $zero, -2048 +-; LA64F-NEXT: xor $a0, $a0, $a1 +-; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64F-NEXT: movcf2gr $a1, $fcc0 +-; LA64F-NEXT: masknez $a0, $a0, $a1 +-; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 +-; LA64F-NEXT: movfr2gr.s $a2, $fa0 +-; LA64F-NEXT: maskeqz $a1, $a2, $a1 +-; LA64F-NEXT: or $a0, $a1, $a0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_float_to_u64: +-; LA64D: # %bb.0: +-; LA64D-NEXT: pcalau12i $a0, .LCPI7_0 +-; LA64D-NEXT: addi.d $a0, $a0, .LCPI7_0 +-; LA64D-NEXT: fld.s $fa1, $a0, 0 +-; LA64D-NEXT: fsub.s $fa2, $fa0, $fa1 +-; LA64D-NEXT: ftintrz.l.s $fa2, $fa2 +-; LA64D-NEXT: movfr2gr.d $a0, $fa2 +-; LA64D-NEXT: lu52i.d $a1, $zero, -2048 +-; LA64D-NEXT: xor $a0, $a0, $a1 +-; LA64D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64D-NEXT: movcf2gr $a1, $fcc0 +-; LA64D-NEXT: masknez $a0, $a0, $a1 +-; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 +-; LA64D-NEXT: movfr2gr.d $a2, $fa0 +-; LA64D-NEXT: maskeqz $a1, $a2, $a1 +-; LA64D-NEXT: or $a0, $a1, $a0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = fptoui float %a to i64 +- ret i64 %1 +-} +- +-define float @convert_i8_to_float(i8 signext %a) nounwind { +-; LA32F-LABEL: convert_i8_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_i8_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: ffint.s.w $fa0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_i8_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_i8_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i8 %a to float +- ret float %1 +-} +- +-define float @convert_i16_to_float(i16 signext %a) nounwind { +-; LA32F-LABEL: convert_i16_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_i16_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: ffint.s.w $fa0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_i16_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_i16_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i16 %a to float +- ret float %1 +-} +- +-define float @convert_i32_to_float(i32 %a) nounwind { +-; LA32F-LABEL: convert_i32_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_i32_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: ffint.s.w $fa0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_i32_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: addi.w $a0, $a0, 0 +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_i32_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: addi.w $a0, $a0, 0 +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i32 %a to float +- ret float %1 +-} +- +-define float @convert_i64_to_float(i64 %a) nounwind { +-; LA32F-LABEL: convert_i64_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: addi.w $sp, $sp, -16 +-; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32F-NEXT: bl __floatdisf +-; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32F-NEXT: addi.w $sp, $sp, 16 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_i64_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: addi.w $sp, $sp, -16 +-; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32D-NEXT: bl __floatdisf +-; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32D-NEXT: addi.w $sp, $sp, 16 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_i64_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_i64_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = sitofp i64 %a to float +- ret float %1 +-} +- +-define float @convert_u8_to_float(i8 zeroext %a) nounwind { +-; LA32F-LABEL: convert_u8_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_u8_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: ffint.s.w $fa0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_u8_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_u8_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i8 %a to float +- ret float %1 +-} +- +-define float @convert_u16_to_float(i16 zeroext %a) nounwind { +-; LA32F-LABEL: convert_u16_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_u16_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: ffint.s.w $fa0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_u16_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_u16_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i16 %a to float +- ret float %1 +-} +- +-define float @convert_u32_to_float(i32 %a) nounwind { +-; LA32F-LABEL: convert_u32_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: srli.w $a1, $a0, 1 +-; LA32F-NEXT: andi $a2, $a0, 1 +-; LA32F-NEXT: or $a1, $a2, $a1 +-; LA32F-NEXT: movgr2fr.w $fa0, $a1 +-; LA32F-NEXT: ffint.s.w $fa0, $fa0 +-; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0 +-; LA32F-NEXT: slti $a1, $a0, 0 +-; LA32F-NEXT: movgr2cf $fcc0, $a1 +-; LA32F-NEXT: movgr2fr.w $fa1, $a0 +-; LA32F-NEXT: ffint.s.w $fa1, $fa1 +-; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_u32_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: addi.w $sp, $sp, -16 +-; LA32D-NEXT: addi.w $a1, $sp, 8 +-; LA32D-NEXT: ori $a1, $a1, 4 +-; LA32D-NEXT: lu12i.w $a2, 275200 +-; LA32D-NEXT: st.w $a2, $a1, 0 +-; LA32D-NEXT: st.w $a0, $sp, 8 +-; LA32D-NEXT: pcalau12i $a0, .LCPI14_0 +-; LA32D-NEXT: addi.w $a0, $a0, .LCPI14_0 +-; LA32D-NEXT: fld.d $fa0, $a0, 0 +-; LA32D-NEXT: fld.d $fa1, $sp, 8 +-; LA32D-NEXT: fsub.d $fa0, $fa1, $fa0 +-; LA32D-NEXT: fcvt.s.d $fa0, $fa0 +-; LA32D-NEXT: addi.w $sp, $sp, 16 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_u32_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: bstrpick.d $a1, $a0, 31, 1 +-; LA64F-NEXT: andi $a2, $a0, 1 +-; LA64F-NEXT: or $a1, $a2, $a1 +-; LA64F-NEXT: movgr2fr.w $fa0, $a1 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 +-; LA64F-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64F-NEXT: slti $a1, $a0, 0 +-; LA64F-NEXT: movgr2cf $fcc0, $a1 +-; LA64F-NEXT: movgr2fr.w $fa1, $a0 +-; LA64F-NEXT: ffint.s.w $fa1, $fa1 +-; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_u32_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: bstrpick.d $a1, $a0, 31, 1 +-; LA64D-NEXT: andi $a2, $a0, 1 +-; LA64D-NEXT: or $a1, $a2, $a1 +-; LA64D-NEXT: movgr2fr.w $fa0, $a1 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 +-; LA64D-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64D-NEXT: slti $a1, $a0, 0 +-; LA64D-NEXT: movgr2cf $fcc0, $a1 +-; LA64D-NEXT: movgr2fr.w $fa1, $a0 +-; LA64D-NEXT: ffint.s.w $fa1, $fa1 +-; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i32 %a to float +- ret float %1 +-} +- +-define float @convert_u64_to_float(i64 %a) nounwind { +-; LA32F-LABEL: convert_u64_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: addi.w $sp, $sp, -16 +-; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32F-NEXT: bl __floatundisf +-; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32F-NEXT: addi.w $sp, $sp, 16 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: convert_u64_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: addi.w $sp, $sp, -16 +-; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32D-NEXT: bl __floatundisf +-; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32D-NEXT: addi.w $sp, $sp, 16 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: convert_u64_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: srli.d $a1, $a0, 1 +-; LA64F-NEXT: andi $a2, $a0, 1 +-; LA64F-NEXT: or $a1, $a2, $a1 +-; LA64F-NEXT: movgr2fr.w $fa0, $a1 +-; LA64F-NEXT: ffint.s.w $fa0, $fa0 +-; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 +-; LA64F-NEXT: slti $a1, $a0, 0 +-; LA64F-NEXT: movgr2cf $fcc0, $a1 +-; LA64F-NEXT: movgr2fr.w $fa1, $a0 +-; LA64F-NEXT: ffint.s.w $fa1, $fa1 +-; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: convert_u64_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: srli.d $a1, $a0, 1 +-; LA64D-NEXT: andi $a2, $a0, 1 +-; LA64D-NEXT: or $a1, $a2, $a1 +-; LA64D-NEXT: movgr2fr.w $fa0, $a1 +-; LA64D-NEXT: ffint.s.w $fa0, $fa0 +-; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 +-; LA64D-NEXT: slti $a1, $a0, 0 +-; LA64D-NEXT: movgr2cf $fcc0, $a1 +-; LA64D-NEXT: movgr2fr.w $fa1, $a0 +-; LA64D-NEXT: ffint.s.w $fa1, $fa1 +-; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = uitofp i64 %a to float +- ret float %1 +-} +- +-define i32 @bitcast_float_to_i32(float %a) nounwind { +-; LA32F-LABEL: bitcast_float_to_i32: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movfr2gr.s $a0, $fa0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: bitcast_float_to_i32: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movfr2gr.s $a0, $fa0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: bitcast_float_to_i32: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movfr2gr.s $a0, $fa0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: bitcast_float_to_i32: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movfr2gr.s $a0, $fa0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = bitcast float %a to i32 +- ret i32 %1 +-} +- +-define float @bitcast_i32_to_float(i32 %a) nounwind { +-; LA32F-LABEL: bitcast_i32_to_float: +-; LA32F: # %bb.0: +-; LA32F-NEXT: movgr2fr.w $fa0, $a0 +-; LA32F-NEXT: jirl $zero, $ra, 0 +-; +-; LA32D-LABEL: bitcast_i32_to_float: +-; LA32D: # %bb.0: +-; LA32D-NEXT: movgr2fr.w $fa0, $a0 +-; LA32D-NEXT: jirl $zero, $ra, 0 +-; +-; LA64F-LABEL: bitcast_i32_to_float: +-; LA64F: # %bb.0: +-; LA64F-NEXT: movgr2fr.w $fa0, $a0 +-; LA64F-NEXT: jirl $zero, $ra, 0 +-; +-; LA64D-LABEL: bitcast_i32_to_float: +-; LA64D: # %bb.0: +-; LA64D-NEXT: movgr2fr.w $fa0, $a0 +-; LA64D-NEXT: jirl $zero, $ra, 0 +- %1 = bitcast i32 %a to float +- ret float %1 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll +deleted file mode 100644 +index 78ee031c1..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll ++++ /dev/null +@@ -1,32 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'fmul' LLVM IR: https://llvm.org/docs/LangRef.html#fmul-instruction +- +-define float @fmul_s(float %x, float %y) { +-; LA32-LABEL: fmul_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fmul_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %mul = fmul float %x, %y +- ret float %mul +-} +- +-define double @fmul_d(double %x, double %y) { +-; LA32-LABEL: fmul_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fmul_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %mul = fmul double %x, %y +- ret double %mul +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll +deleted file mode 100644 +index 3a8a4127d..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll ++++ /dev/null +@@ -1,32 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'fneg' LLVM IR: https://llvm.org/docs/LangRef.html#fneg-instruction +- +-define float @fneg_s(float %x) { +-; LA32-LABEL: fneg_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fneg.s $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fneg_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fneg.s $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %neg = fneg float %x +- ret float %neg +-} +- +-define double @fneg_d(double %x) { +-; LA32-LABEL: fneg_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fneg.d $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fneg_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fneg.d $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %neg = fneg double %x +- ret double %neg +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll +deleted file mode 100644 +index 9ddf583d9..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll ++++ /dev/null +@@ -1,60 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'fsub' LLVM IR: https://llvm.org/docs/LangRef.html#fsub-instruction +- +-define float @fsub_s(float %x, float %y) { +-; LA32-LABEL: fsub_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fsub.s $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fsub_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fsub.s $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = fsub float %x, %y +- ret float %sub +-} +- +-define double @fsub_d(double %x, double %y) { +-; LA32-LABEL: fsub_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fsub.d $fa0, $fa0, $fa1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fsub_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = fsub double %x, %y +- ret double %sub +-} +- +-define float @fneg_s(float %x) { +-; LA32-LABEL: fneg_s: +-; LA32: # %bb.0: +-; LA32-NEXT: fneg.s $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fneg_s: +-; LA64: # %bb.0: +-; LA64-NEXT: fneg.s $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = fsub float -0.0, %x +- ret float %res +-} +- +-define double @fneg_d(double %x) { +-; LA32-LABEL: fneg_d: +-; LA32: # %bb.0: +-; LA32-NEXT: fneg.d $fa0, $fa0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fneg_d: +-; LA64: # %bb.0: +-; LA64-NEXT: fneg.d $fa0, $fa0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = fsub double -0.0, %x +- ret double %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll +deleted file mode 100644 +index 947886e6b..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll ++++ /dev/null +@@ -1,244 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'icmp' LLVM IR: https://llvm.org/docs/LangRef.html#icmp-instruction +- +-define i1 @icmp_eq(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_eq: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_eq: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp eq i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_ne(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_ne: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ne: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ne i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_ugt(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ugt i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_uge(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp uge i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_ult(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ult i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_ule(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ule i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_sgt(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_sgt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_sgt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp sgt i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_sge(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_sge: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_sge: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp sge i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_slt(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_slt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_slt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp slt i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_sle(i32 signext %a, i32 signext %b) { +-; LA32-LABEL: icmp_sle: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_sle: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp sle i32 %a, %b +- ret i1 %res +-} +- +-define i1 @icmp_slt_3(i32 signext %a) { +-; LA32-LABEL: icmp_slt_3: +-; LA32: # %bb.0: +-; LA32-NEXT: slti $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_slt_3: +-; LA64: # %bb.0: +-; LA64-NEXT: slti $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp slt i32 %a, 3 +- ret i1 %res +-} +- +-define i1 @icmp_ult_3(i32 signext %a) { +-; LA32-LABEL: icmp_ult_3: +-; LA32: # %bb.0: +-; LA32-NEXT: sltui $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ult_3: +-; LA64: # %bb.0: +-; LA64-NEXT: sltui $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ult i32 %a, 3 +- ret i1 %res +-} +- +-define i1 @icmp_eq_0(i32 signext %a) { +-; LA32-LABEL: icmp_eq_0: +-; LA32: # %bb.0: +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_eq_0: +-; LA64: # %bb.0: +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp eq i32 %a, 0 +- ret i1 %res +-} +- +-define i1 @icmp_eq_3(i32 signext %a) { +-; LA32-LABEL: icmp_eq_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, -3 +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_eq_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, -3 +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp eq i32 %a, 3 +- ret i1 %res +-} +- +-define i1 @icmp_ne_0(i32 signext %a) { +-; LA32-LABEL: icmp_ne_0: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ne_0: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ne i32 %a, 0 +- ret i1 %res +-} +- +-define i1 @icmp_ne_3(i32 signext %a) { +-; LA32-LABEL: icmp_ne_3: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $a0, $a0, -3 +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: icmp_ne_3: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.d $a0, $a0, -3 +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = icmp ne i32 %a, 3 +- ret i1 %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll +deleted file mode 100644 +index abbd700f4..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll ++++ /dev/null +@@ -1,30 +0,0 @@ +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +- +-define i32 @indirectbr(ptr %target) nounwind { +-; CHECK-LABEL: indirectbr: +-; CHECK: # %bb.0: +-; CHECK-NEXT: jirl $zero, $a0, 0 +-; CHECK-NEXT: .LBB0_1: # %test_label +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: jirl $zero, $ra, 0 +- indirectbr ptr %target, [label %test_label] +-test_label: +- br label %ret +-ret: +- ret i32 0 +-} +- +-define i32 @indirectbr_with_offset(ptr %a) nounwind { +-; CHECK-LABEL: indirectbr_with_offset: +-; CHECK: # %bb.0: +-; CHECK-NEXT: jirl $zero, $a0, 1380 +-; CHECK-NEXT: .LBB1_1: # %test_label +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %target = getelementptr inbounds i8, ptr %a, i32 1380 +- indirectbr ptr %target, [label %test_label] +-test_label: +- br label %ret +-ret: +- ret i32 0 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +deleted file mode 100644 +index 1f06c818a..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ /dev/null +@@ -1,143 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-define i8 @load_acquire_i8(ptr %ptr) { +-; LA32-LABEL: load_acquire_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_acquire_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load atomic i8, ptr %ptr acquire, align 1 +- ret i8 %val +-} +- +-define i16 @load_acquire_i16(ptr %ptr) { +-; LA32-LABEL: load_acquire_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_acquire_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load atomic i16, ptr %ptr acquire, align 2 +- ret i16 %val +-} +- +-define i32 @load_acquire_i32(ptr %ptr) { +-; LA32-LABEL: load_acquire_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_acquire_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load atomic i32, ptr %ptr acquire, align 4 +- ret i32 %val +-} +- +-define i64 @load_acquire_i64(ptr %ptr) { +-; LA32-LABEL: load_acquire_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: ori $a1, $zero, 2 +-; LA32-NEXT: bl __atomic_load_8 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_acquire_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %val = load atomic i64, ptr %ptr acquire, align 8 +- ret i64 %val +-} +- +-define void @store_release_i8(ptr %ptr, i8 signext %v) { +-; LA32-LABEL: store_release_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: st.b $a0, $a1, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: store_release_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: st.b $a0, $a1, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- store atomic i8 %v, ptr %ptr release, align 1 +- ret void +-} +- +-define void @store_release_i16(ptr %ptr, i16 signext %v) { +-; LA32-LABEL: store_release_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: st.h $a0, $a1, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: store_release_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: st.h $a0, $a1, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- store atomic i16 %v, ptr %ptr release, align 2 +- ret void +-} +- +-define void @store_release_i32(ptr %ptr, i32 signext %v) { +-; LA32-LABEL: store_release_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 +-; LA32-NEXT: st.w $a0, $a1, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: store_release_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: st.w $a0, $a1, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- store atomic i32 %v, ptr %ptr release, align 4 +- ret void +-} +- +-define void @store_release_i64(ptr %ptr, i64 %v) { +-; LA32-LABEL: store_release_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: ori $a3, $zero, 3 +-; LA32-NEXT: bl __atomic_store_8 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: store_release_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 +-; LA64-NEXT: st.d $a0, $a1, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- store atomic i64 %v, ptr %ptr release, align 8 +- ret void +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll +deleted file mode 100644 +index 8894e3cac..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll ++++ /dev/null +@@ -1,406 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA32NOPIC,LA32 +-; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA32PIC,LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA64NOPIC,LA64 +-; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA64PIC,LA64 +- +-;; Check load from and store to global variables. +-@G = dso_local global i32 zeroinitializer, align 4 +-@arr = dso_local global [10 x i32] zeroinitializer, align 4 +- +-define i32 @load_store_global() nounwind { +-; ALL-LABEL: load_store_global: +-; ALL: # %bb.0: +- +-; LA32NOPIC-NEXT: pcalau12i $a0, G +-; LA32NOPIC-NEXT: addi.w $a1, $a0, G +-; LA32PIC-NEXT: pcalau12i $a0, .LG$local +-; LA32PIC-NEXT: addi.w $a1, $a0, .LG$local +-; LA32-NEXT: ld.w $a0, $a1, 0 +-; LA32-NEXT: addi.w $a0, $a0, 1 +-; LA32-NEXT: st.w $a0, $a1, 0 +- +-; LA64NOPIC-NEXT: pcalau12i $a0, G +-; LA64NOPIC-NEXT: addi.d $a1, $a0, G +-; LA64PIC-NEXT: pcalau12i $a0, .LG$local +-; LA64PIC-NEXT: addi.d $a1, $a0, .LG$local +-; LA64-NEXT: ld.w $a0, $a1, 0 +-; LA64-NEXT: addi.d $a0, $a0, 1 +-; LA64-NEXT: st.w $a0, $a1, 0 +- +-; ALL-NEXT: jirl $zero, $ra, 0 +- +- %v = load i32, ptr @G +- %sum = add i32 %v, 1 +- store i32 %sum, ptr @G +- ret i32 %sum +-} +- +-define i32 @load_store_global_array(i32 %a) nounwind { +-; ALL-LABEL: load_store_global_array: +-; ALL: # %bb.0: +- +-; LA32NOPIC-NEXT: pcalau12i $a1, arr +-; LA32NOPIC-NEXT: addi.w $a2, $a1, arr +-; LA32PIC-NEXT: pcalau12i $a1, .Larr$local +-; LA32PIC-NEXT: addi.w $a2, $a1, .Larr$local +-; LA32-NEXT: ld.w $a1, $a2, 0 +-; LA32-NEXT: st.w $a0, $a2, 0 +-; LA32NOPIC-NEXT: ld.w $a3, $a2, 0 +-; LA32NOPIC-NEXT: st.w $a0, $a2, 0 +-; LA32PIC-NEXT: ld.w $a3, $a2, 36 +-; LA32PIC-NEXT: st.w $a0, $a2, 36 +- +-; LA64NOPIC-NEXT: pcalau12i $a1, arr +-; LA64NOPIC-NEXT: addi.d $a2, $a1, arr +-; LA64PIC-NEXT: pcalau12i $a1, .Larr$local +-; LA64PIC-NEXT: addi.d $a2, $a1, .Larr$local +-; LA64-NEXT: ld.w $a1, $a2, 0 +-; LA64-NEXT: st.w $a0, $a2, 0 +-; LA64NOPIC-NEXT: ld.w $a3, $a2, 0 +-; LA64NOPIC-NEXT: st.w $a0, $a2, 0 +-; LA64PIC-NEXT: ld.w $a3, $a2, 36 +-; LA64PIC-NEXT: st.w $a0, $a2, 36 +- +-; ALL-NEXT: move $a0, $a1 +-; ALL-NEXT: jirl $zero, $ra, 0 +- +- %1 = load volatile i32, ptr @arr, align 4 +- store i32 %a, ptr @arr, align 4 +- %2 = getelementptr [10 x i32], ptr @arr, i32 0, i32 9 +- %3 = load volatile i32, ptr %2, align 4 +- store i32 %a, ptr %2, align 4 +- ret i32 %1 +-} +- +-;; Check indexed and unindexed, sext, zext and anyext loads. +- +-define i64 @ld_b(ptr %a) nounwind { +-; LA32-LABEL: ld_b: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.b $a1, $a0, 0 +-; LA32-NEXT: ld.b $a0, $a0, 1 +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_b: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.b $a1, $a0, 0 +-; LA64-NEXT: ld.b $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i8, ptr %a, i64 1 +- %2 = load i8, ptr %1 +- %3 = sext i8 %2 to i64 +- %4 = load volatile i8, ptr %a +- ret i64 %3 +-} +- +-define i64 @ld_h(ptr %a) nounwind { +-; LA32-LABEL: ld_h: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.h $a1, $a0, 0 +-; LA32-NEXT: ld.h $a0, $a0, 4 +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_h: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.h $a1, $a0, 0 +-; LA64-NEXT: ld.h $a0, $a0, 4 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i16, ptr %a, i64 2 +- %2 = load i16, ptr %1 +- %3 = sext i16 %2 to i64 +- %4 = load volatile i16, ptr %a +- ret i64 %3 +-} +- +-define i64 @ld_w(ptr %a) nounwind { +-; LA32-LABEL: ld_w: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a1, $a0, 0 +-; LA32-NEXT: ld.w $a0, $a0, 12 +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_w: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a1, $a0, 0 +-; LA64-NEXT: ld.w $a0, $a0, 12 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i32, ptr %a, i64 3 +- %2 = load i32, ptr %1 +- %3 = sext i32 %2 to i64 +- %4 = load volatile i32, ptr %a +- ret i64 %3 +-} +- +-define i64 @ld_d(ptr %a) nounwind { +-; LA32-LABEL: ld_d: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a1, $a0, 4 +-; LA32-NEXT: ld.w $a1, $a0, 0 +-; LA32-NEXT: ld.w $a1, $a0, 28 +-; LA32-NEXT: ld.w $a0, $a0, 24 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_d: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.d $a1, $a0, 0 +-; LA64-NEXT: ld.d $a0, $a0, 24 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i64, ptr %a, i64 3 +- %2 = load i64, ptr %1 +- %3 = load volatile i64, ptr %a +- ret i64 %2 +-} +- +-define i64 @ld_bu(ptr %a) nounwind { +-; LA32-LABEL: ld_bu: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.bu $a1, $a0, 0 +-; LA32-NEXT: ld.bu $a2, $a0, 4 +-; LA32-NEXT: add.w $a0, $a2, $a1 +-; LA32-NEXT: sltu $a1, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_bu: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.bu $a1, $a0, 0 +-; LA64-NEXT: ld.bu $a0, $a0, 4 +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i8, ptr %a, i64 4 +- %2 = load i8, ptr %1 +- %3 = zext i8 %2 to i64 +- %4 = load volatile i8, ptr %a +- %5 = zext i8 %4 to i64 +- %6 = add i64 %3, %5 +- ret i64 %6 +-} +- +-define i64 @ld_hu(ptr %a) nounwind { +-; LA32-LABEL: ld_hu: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.hu $a1, $a0, 0 +-; LA32-NEXT: ld.hu $a2, $a0, 10 +-; LA32-NEXT: add.w $a0, $a2, $a1 +-; LA32-NEXT: sltu $a1, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_hu: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.hu $a1, $a0, 0 +-; LA64-NEXT: ld.hu $a0, $a0, 10 +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i16, ptr %a, i64 5 +- %2 = load i16, ptr %1 +- %3 = zext i16 %2 to i64 +- %4 = load volatile i16, ptr %a +- %5 = zext i16 %4 to i64 +- %6 = add i64 %3, %5 +- ret i64 %6 +-} +- +-define i64 @ld_wu(ptr %a) nounwind { +-; LA32-LABEL: ld_wu: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.w $a1, $a0, 0 +-; LA32-NEXT: ld.w $a2, $a0, 20 +-; LA32-NEXT: add.w $a0, $a2, $a1 +-; LA32-NEXT: sltu $a1, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_wu: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.wu $a1, $a0, 0 +-; LA64-NEXT: ld.wu $a0, $a0, 20 +-; LA64-NEXT: add.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i32, ptr %a, i64 5 +- %2 = load i32, ptr %1 +- %3 = zext i32 %2 to i64 +- %4 = load volatile i32, ptr %a +- %5 = zext i32 %4 to i64 +- %6 = add i64 %3, %5 +- ret i64 %6 +-} +- +-;; Check indexed and unindexed stores. +- +-define void @st_b(ptr %a, i8 %b) nounwind { +-; ALL-LABEL: st_b: +-; ALL: # %bb.0: +-; ALL-NEXT: st.b $a1, $a0, 6 +-; ALL-NEXT: st.b $a1, $a0, 0 +-; ALL-NEXT: jirl $zero, $ra, 0 +- store i8 %b, ptr %a +- %1 = getelementptr i8, ptr %a, i64 6 +- store i8 %b, ptr %1 +- ret void +-} +- +-define void @st_h(ptr %a, i16 %b) nounwind { +-; ALL-LABEL: st_h: +-; ALL: # %bb.0: +-; ALL-NEXT: st.h $a1, $a0, 14 +-; ALL-NEXT: st.h $a1, $a0, 0 +-; ALL-NEXT: jirl $zero, $ra, 0 +- store i16 %b, ptr %a +- %1 = getelementptr i16, ptr %a, i64 7 +- store i16 %b, ptr %1 +- ret void +-} +- +-define void @st_w(ptr %a, i32 %b) nounwind { +-; ALL-LABEL: st_w: +-; ALL: # %bb.0: +-; ALL-NEXT: st.w $a1, $a0, 28 +-; ALL-NEXT: st.w $a1, $a0, 0 +-; ALL-NEXT: jirl $zero, $ra, 0 +- store i32 %b, ptr %a +- %1 = getelementptr i32, ptr %a, i64 7 +- store i32 %b, ptr %1 +- ret void +-} +- +-define void @st_d(ptr %a, i64 %b) nounwind { +-; LA32-LABEL: st_d: +-; LA32: # %bb.0: +-; LA32-NEXT: st.w $a2, $a0, 68 +-; LA32-NEXT: st.w $a2, $a0, 4 +-; LA32-NEXT: st.w $a1, $a0, 64 +-; LA32-NEXT: st.w $a1, $a0, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: st_d: +-; LA64: # %bb.0: +-; LA64-NEXT: st.d $a1, $a0, 64 +-; LA64-NEXT: st.d $a1, $a0, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- store i64 %b, ptr %a +- %1 = getelementptr i64, ptr %a, i64 8 +- store i64 %b, ptr %1 +- ret void +-} +- +-;; Check load from and store to an i1 location. +-define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { +- ;; sextload i1 +-; LA32-LABEL: load_sext_zext_anyext_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.b $a1, $a0, 0 +-; LA32-NEXT: ld.bu $a1, $a0, 1 +-; LA32-NEXT: ld.bu $a2, $a0, 2 +-; LA32-NEXT: sub.w $a0, $a2, $a1 +-; LA32-NEXT: sltu $a1, $a2, $a1 +-; LA32-NEXT: sub.w $a1, $zero, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_sext_zext_anyext_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.b $a1, $a0, 0 +-; LA64-NEXT: ld.bu $a1, $a0, 1 +-; LA64-NEXT: ld.bu $a0, $a0, 2 +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i1, ptr %a, i64 1 +- %2 = load i1, ptr %1 +- %3 = sext i1 %2 to i64 +- ;; zextload i1 +- %4 = getelementptr i1, ptr %a, i64 2 +- %5 = load i1, ptr %4 +- %6 = zext i1 %5 to i64 +- %7 = add i64 %3, %6 +- ;; extload i1 (anyext). Produced as the load is unused. +- %8 = load volatile i1, ptr %a +- ret i64 %7 +-} +- +-define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { +- ;; sextload i1 +-; LA32-LABEL: load_sext_zext_anyext_i1_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: ld.b $a1, $a0, 0 +-; LA32-NEXT: ld.bu $a1, $a0, 1 +-; LA32-NEXT: ld.bu $a0, $a0, 2 +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: load_sext_zext_anyext_i1_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: ld.b $a1, $a0, 0 +-; LA64-NEXT: ld.bu $a1, $a0, 1 +-; LA64-NEXT: ld.bu $a0, $a0, 2 +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr i1, ptr %a, i64 1 +- %2 = load i1, ptr %1 +- %3 = sext i1 %2 to i16 +- ;; zextload i1 +- %4 = getelementptr i1, ptr %a, i64 2 +- %5 = load i1, ptr %4 +- %6 = zext i1 %5 to i16 +- %7 = add i16 %3, %6 +- ;; extload i1 (anyext). Produced as the load is unused. +- %8 = load volatile i1, ptr %a +- ret i16 %7 +-} +- +-define i64 @ld_sd_constant(i64 %a) nounwind { +-; LA32-LABEL: ld_sd_constant: +-; LA32: # %bb.0: +-; LA32-NEXT: lu12i.w $a3, -136485 +-; LA32-NEXT: ori $a4, $a3, 3823 +-; LA32-NEXT: ld.w $a2, $a4, 0 +-; LA32-NEXT: st.w $a0, $a4, 0 +-; LA32-NEXT: ori $a0, $a3, 3827 +-; LA32-NEXT: ld.w $a3, $a0, 0 +-; LA32-NEXT: st.w $a1, $a0, 0 +-; LA32-NEXT: move $a0, $a2 +-; LA32-NEXT: move $a1, $a3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: ld_sd_constant: +-; LA64: # %bb.0: +-; LA64-NEXT: lu12i.w $a1, -136485 +-; LA64-NEXT: ori $a1, $a1, 3823 +-; LA64-NEXT: lu32i.d $a1, -147729 +-; LA64-NEXT: lu52i.d $a2, $a1, -534 +-; LA64-NEXT: ld.d $a1, $a2, 0 +-; LA64-NEXT: st.d $a0, $a2, 0 +-; LA64-NEXT: move $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = inttoptr i64 16045690984833335023 to ptr +- %2 = load volatile i64, ptr %1 +- store i64 %a, ptr %1 +- ret i64 %2 +-} +- +-;; Check load from and store to a float location. +-define float @load_store_float(ptr %a, float %b) nounwind { +-; ALL-LABEL: load_store_float: +-; ALL: # %bb.0: +-; ALL-NEXT: fld.s $fa1, $a0, 4 +-; ALL-NEXT: fst.s $fa0, $a0, 4 +-; ALL-NEXT: fmov.s $fa0, $fa1 +-; ALL-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr float, ptr %a, i64 1 +- %2 = load float, ptr %1 +- store float %b, ptr %1 +- ret float %2 +-} +- +-;; Check load from and store to a double location. +-define double @load_store_double(ptr %a, double %b) nounwind { +-; ALL-LABEL: load_store_double: +-; ALL: # %bb.0: +-; ALL-NEXT: fld.d $fa1, $a0, 8 +-; ALL-NEXT: fst.d $fa0, $a0, 8 +-; ALL-NEXT: fmov.d $fa0, $fa1 +-; ALL-NEXT: jirl $zero, $ra, 0 +- %1 = getelementptr double, ptr %a, i64 1 +- %2 = load double, ptr %1 +- store double %b, ptr %1 +- ret double %2 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll +deleted file mode 100644 +index 2f63c64de..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll ++++ /dev/null +@@ -1,160 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'lshr' LLVM IR: https://llvm.org/docs/LangRef.html#lshr-instruction +- +-define i1 @lshr_i1(i1 %x, i1 %y) { +-; LA32-LABEL: lshr_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i1 %x, %y +- ret i1 %lshr +-} +- +-define i8 @lshr_i8(i8 %x, i8 %y) { +-; LA32-LABEL: lshr_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: srl.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i8 %x, %y +- ret i8 %lshr +-} +- +-define i16 @lshr_i16(i16 %x, i16 %y) { +-; LA32-LABEL: lshr_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-NEXT: srl.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i16 %x, %y +- ret i16 %lshr +-} +- +-define i32 @lshr_i32(i32 %x, i32 %y) { +-; LA32-LABEL: lshr_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: srl.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i32 %x, %y +- ret i32 %lshr +-} +- +-define i64 @lshr_i64(i64 %x, i64 %y) { +-; LA32-LABEL: lshr_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: xori $a3, $a2, 31 +-; LA32-NEXT: slli.w $a4, $a1, 1 +-; LA32-NEXT: sll.w $a3, $a4, $a3 +-; LA32-NEXT: srl.w $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a3 +-; LA32-NEXT: addi.w $a3, $a2, -32 +-; LA32-NEXT: slti $a4, $a3, 0 +-; LA32-NEXT: maskeqz $a0, $a0, $a4 +-; LA32-NEXT: srl.w $a5, $a1, $a3 +-; LA32-NEXT: masknez $a4, $a5, $a4 +-; LA32-NEXT: or $a0, $a0, $a4 +-; LA32-NEXT: srl.w $a1, $a1, $a2 +-; LA32-NEXT: srai.w $a2, $a3, 31 +-; LA32-NEXT: and $a1, $a2, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: srl.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i64 %x, %y +- ret i64 %lshr +-} +- +-define i1 @lshr_i1_3(i1 %x) { +-; LA32-LABEL: lshr_i1_3: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i1_3: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i1 %x, 3 +- ret i1 %lshr +-} +- +-define i8 @lshr_i8_3(i8 %x) { +-; LA32-LABEL: lshr_i8_3: +-; LA32: # %bb.0: +-; LA32-NEXT: bstrpick.w $a0, $a0, 7, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i8_3: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 7, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i8 %x, 3 +- ret i8 %lshr +-} +- +-define i16 @lshr_i16_3(i16 %x) { +-; LA32-LABEL: lshr_i16_3: +-; LA32: # %bb.0: +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i16_3: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i16 %x, 3 +- ret i16 %lshr +-} +- +-define i32 @lshr_i32_3(i32 %x) { +-; LA32-LABEL: lshr_i32_3: +-; LA32: # %bb.0: +-; LA32-NEXT: srli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i32_3: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i32 %x, 3 +- ret i32 %lshr +-} +- +-define i64 @lshr_i64_3(i64 %x) { +-; LA32-LABEL: lshr_i64_3: +-; LA32: # %bb.0: +-; LA32-NEXT: srli.w $a0, $a0, 3 +-; LA32-NEXT: slli.w $a2, $a1, 29 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: srli.w $a1, $a1, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: lshr_i64_3: +-; LA64: # %bb.0: +-; LA64-NEXT: srli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %lshr = lshr i64 %x, 3 +- ret i64 %lshr +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll +deleted file mode 100644 +index 0d31e790c..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll ++++ /dev/null +@@ -1,287 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'mul' LLVM IR: https://llvm.org/docs/LangRef.html#mul-instruction +- +-define i1 @mul_i1(i1 %a, i1 %b) { +-; LA32-LABEL: mul_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mul.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = mul i1 %a, %b +- ret i1 %r +-} +- +-define i8 @mul_i8(i8 %a, i8 %b) { +-; LA32-LABEL: mul_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mul.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = mul i8 %a, %b +- ret i8 %r +-} +- +-define i16 @mul_i16(i16 %a, i16 %b) { +-; LA32-LABEL: mul_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mul.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = mul i16 %a, %b +- ret i16 %r +-} +- +-define i32 @mul_i32(i32 %a, i32 %b) { +-; LA32-LABEL: mul_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mul.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = mul i32 %a, %b +- ret i32 %r +-} +- +-define i64 @mul_i64(i64 %a, i64 %b) { +-; LA32-LABEL: mul_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mul.w $a3, $a0, $a3 +-; LA32-NEXT: mulh.wu $a4, $a0, $a2 +-; LA32-NEXT: add.w $a3, $a4, $a3 +-; LA32-NEXT: mul.w $a1, $a1, $a2 +-; LA32-NEXT: add.w $a1, $a3, $a1 +-; LA32-NEXT: mul.w $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = mul i64 %a, %b +- ret i64 %r +-} +- +-define i64 @mul_pow2(i64 %a) { +-; LA32-LABEL: mul_pow2: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a1, $a1, 3 +-; LA32-NEXT: srli.w $a2, $a0, 29 +-; LA32-NEXT: or $a1, $a1, $a2 +-; LA32-NEXT: slli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_pow2: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = mul i64 %a, 8 +- ret i64 %1 +-} +- +-define i64 @mul_p5(i64 %a) { +-; LA32-LABEL: mul_p5: +-; LA32: # %bb.0: +-; LA32-NEXT: ori $a2, $zero, 5 +-; LA32-NEXT: mul.w $a1, $a1, $a2 +-; LA32-NEXT: mulh.wu $a3, $a0, $a2 +-; LA32-NEXT: add.w $a1, $a3, $a1 +-; LA32-NEXT: mul.w $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mul_p5: +-; LA64: # %bb.0: +-; LA64-NEXT: ori $a1, $zero, 5 +-; LA64-NEXT: mul.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = mul i64 %a, 5 +- ret i64 %1 +-} +- +-define i32 @mulh_w(i32 %a, i32 %b) { +-; LA32-LABEL: mulh_w: +-; LA32: # %bb.0: +-; LA32-NEXT: mulh.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulh_w: +-; LA64: # %bb.0: +-; LA64-NEXT: mulw.d.w $a0, $a0, $a1 +-; LA64-NEXT: srli.d $a0, $a0, 32 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i32 %a to i64 +- %2 = sext i32 %b to i64 +- %3 = mul i64 %1, %2 +- %4 = lshr i64 %3, 32 +- %5 = trunc i64 %4 to i32 +- ret i32 %5 +-} +- +-define i32 @mulh_wu(i32 %a, i32 %b) { +-; LA32-LABEL: mulh_wu: +-; LA32: # %bb.0: +-; LA32-NEXT: mulh.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulh_wu: +-; LA64: # %bb.0: +-; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 +-; LA64-NEXT: srli.d $a0, $a0, 32 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i32 %a to i64 +- %2 = zext i32 %b to i64 +- %3 = mul i64 %1, %2 +- %4 = lshr i64 %3, 32 +- %5 = trunc i64 %4 to i32 +- ret i32 %5 +-} +- +-define i64 @mulh_d(i64 %a, i64 %b) { +-; LA32-LABEL: mulh_d: +-; LA32: # %bb.0: +-; LA32-NEXT: mulh.wu $a4, $a0, $a2 +-; LA32-NEXT: mul.w $a5, $a1, $a2 +-; LA32-NEXT: add.w $a4, $a5, $a4 +-; LA32-NEXT: sltu $a5, $a4, $a5 +-; LA32-NEXT: mulh.wu $a6, $a1, $a2 +-; LA32-NEXT: add.w $a5, $a6, $a5 +-; LA32-NEXT: mul.w $a6, $a0, $a3 +-; LA32-NEXT: add.w $a4, $a6, $a4 +-; LA32-NEXT: sltu $a4, $a4, $a6 +-; LA32-NEXT: mulh.wu $a6, $a0, $a3 +-; LA32-NEXT: add.w $a4, $a6, $a4 +-; LA32-NEXT: add.w $a4, $a5, $a4 +-; LA32-NEXT: sltu $a5, $a4, $a5 +-; LA32-NEXT: mulh.wu $a6, $a1, $a3 +-; LA32-NEXT: add.w $a5, $a6, $a5 +-; LA32-NEXT: mul.w $a6, $a1, $a3 +-; LA32-NEXT: add.w $a4, $a6, $a4 +-; LA32-NEXT: sltu $a6, $a4, $a6 +-; LA32-NEXT: add.w $a5, $a5, $a6 +-; LA32-NEXT: srai.w $a6, $a1, 31 +-; LA32-NEXT: mul.w $a7, $a2, $a6 +-; LA32-NEXT: mulh.wu $a2, $a2, $a6 +-; LA32-NEXT: add.w $a2, $a2, $a7 +-; LA32-NEXT: mul.w $a6, $a3, $a6 +-; LA32-NEXT: add.w $a2, $a2, $a6 +-; LA32-NEXT: srai.w $a3, $a3, 31 +-; LA32-NEXT: mul.w $a1, $a3, $a1 +-; LA32-NEXT: mulh.wu $a6, $a3, $a0 +-; LA32-NEXT: add.w $a1, $a6, $a1 +-; LA32-NEXT: mul.w $a0, $a3, $a0 +-; LA32-NEXT: add.w $a1, $a1, $a0 +-; LA32-NEXT: add.w $a1, $a1, $a2 +-; LA32-NEXT: add.w $a2, $a0, $a7 +-; LA32-NEXT: sltu $a0, $a2, $a0 +-; LA32-NEXT: add.w $a0, $a1, $a0 +-; LA32-NEXT: add.w $a1, $a5, $a0 +-; LA32-NEXT: add.w $a0, $a4, $a2 +-; LA32-NEXT: sltu $a2, $a0, $a4 +-; LA32-NEXT: add.w $a1, $a1, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulh_d: +-; LA64: # %bb.0: +-; LA64-NEXT: mulh.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i64 %a to i128 +- %2 = sext i64 %b to i128 +- %3 = mul i128 %1, %2 +- %4 = lshr i128 %3, 64 +- %5 = trunc i128 %4 to i64 +- ret i64 %5 +-} +- +-define i64 @mulh_du(i64 %a, i64 %b) { +-; LA32-LABEL: mulh_du: +-; LA32: # %bb.0: +-; LA32-NEXT: mulh.wu $a4, $a0, $a2 +-; LA32-NEXT: mul.w $a5, $a1, $a2 +-; LA32-NEXT: add.w $a4, $a5, $a4 +-; LA32-NEXT: sltu $a5, $a4, $a5 +-; LA32-NEXT: mulh.wu $a2, $a1, $a2 +-; LA32-NEXT: add.w $a2, $a2, $a5 +-; LA32-NEXT: mul.w $a5, $a0, $a3 +-; LA32-NEXT: add.w $a4, $a5, $a4 +-; LA32-NEXT: sltu $a4, $a4, $a5 +-; LA32-NEXT: mulh.wu $a0, $a0, $a3 +-; LA32-NEXT: add.w $a0, $a0, $a4 +-; LA32-NEXT: mul.w $a4, $a1, $a3 +-; LA32-NEXT: mulh.wu $a1, $a1, $a3 +-; LA32-NEXT: add.w $a0, $a2, $a0 +-; LA32-NEXT: sltu $a2, $a0, $a2 +-; LA32-NEXT: add.w $a1, $a1, $a2 +-; LA32-NEXT: add.w $a0, $a4, $a0 +-; LA32-NEXT: sltu $a2, $a0, $a4 +-; LA32-NEXT: add.w $a1, $a1, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulh_du: +-; LA64: # %bb.0: +-; LA64-NEXT: mulh.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i64 %a to i128 +- %2 = zext i64 %b to i128 +- %3 = mul i128 %1, %2 +- %4 = lshr i128 %3, 64 +- %5 = trunc i128 %4 to i64 +- ret i64 %5 +-} +- +-define i64 @mulw_d_w(i32 %a, i32 %b) { +-; LA32-LABEL: mulw_d_w: +-; LA32: # %bb.0: +-; LA32-NEXT: mul.w $a2, $a0, $a1 +-; LA32-NEXT: mulh.w $a1, $a0, $a1 +-; LA32-NEXT: move $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulw_d_w: +-; LA64: # %bb.0: +-; LA64-NEXT: mulw.d.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i32 %a to i64 +- %2 = sext i32 %b to i64 +- %3 = mul i64 %1, %2 +- ret i64 %3 +-} +- +-define i64 @mulw_d_wu(i32 %a, i32 %b) { +-; LA32-LABEL: mulw_d_wu: +-; LA32: # %bb.0: +-; LA32-NEXT: mul.w $a2, $a0, $a1 +-; LA32-NEXT: mulh.wu $a1, $a0, $a1 +-; LA32-NEXT: move $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: mulw_d_wu: +-; LA64: # %bb.0: +-; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i32 %a to i64 +- %2 = zext i32 %b to i64 +- %3 = mul i64 %1, %2 +- ret i64 %3 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll +deleted file mode 100644 +index 370065732..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll ++++ /dev/null +@@ -1,264 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'or' LLVM IR: https://llvm.org/docs/LangRef.html#or-instruction +- +-define i1 @or_i1(i1 %a, i1 %b) { +-; LA32-LABEL: or_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i1 %a, %b +- ret i1 %r +-} +- +-define i8 @or_i8(i8 %a, i8 %b) { +-; LA32-LABEL: or_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i8 %a, %b +- ret i8 %r +-} +- +-define i16 @or_i16(i16 %a, i16 %b) { +-; LA32-LABEL: or_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i16 %a, %b +- ret i16 %r +-} +- +-define i32 @or_i32(i32 %a, i32 %b) { +-; LA32-LABEL: or_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i32 %a, %b +- ret i32 %r +-} +- +-define i64 @or_i64(i64 %a, i64 %b) { +-; LA32-LABEL: or_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: or $a1, $a1, $a3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i64 %a, %b +- ret i64 %r +-} +- +-define i1 @or_i1_0(i1 %b) { +-; LA32-LABEL: or_i1_0: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i1_0: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i1 4, %b +- ret i1 %r +-} +- +-define i1 @or_i1_5(i1 %b) { +-; LA32-LABEL: or_i1_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $zero, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i1_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $zero, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i1 5, %b +- ret i1 %r +-} +- +-define i8 @or_i8_5(i8 %b) { +-; LA32-LABEL: or_i8_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i8_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i8 5, %b +- ret i8 %r +-} +- +-define i8 @or_i8_257(i8 %b) { +-; LA32-LABEL: or_i8_257: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i8_257: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i8 257, %b +- ret i8 %r +-} +- +-define i16 @or_i16_5(i16 %b) { +-; LA32-LABEL: or_i16_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i16_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i16 5, %b +- ret i16 %r +-} +- +-define i16 @or_i16_0x1000(i16 %b) { +-; LA32-LABEL: or_i16_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i16_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i16 4096, %b +- ret i16 %r +-} +- +-define i16 @or_i16_0x10001(i16 %b) { +-; LA32-LABEL: or_i16_0x10001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i16_0x10001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i16 65537, %b +- ret i16 %r +-} +- +-define i32 @or_i32_5(i32 %b) { +-; LA32-LABEL: or_i32_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i32_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i32 5, %b +- ret i32 %r +-} +- +-define i32 @or_i32_0x1000(i32 %b) { +-; LA32-LABEL: or_i32_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i32_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i32 4096, %b +- ret i32 %r +-} +- +-define i32 @or_i32_0x100000001(i32 %b) { +-; LA32-LABEL: or_i32_0x100000001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i32_0x100000001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i32 4294967297, %b +- ret i32 %r +-} +- +-define i64 @or_i64_5(i64 %b) { +-; LA32-LABEL: or_i64_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i64_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i64 5, %b +- ret i64 %r +-} +- +-define i64 @or_i64_0x1000(i64 %b) { +-; LA32-LABEL: or_i64_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a2, 1 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: or_i64_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = or i64 4096, %b +- ret i64 %r +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +deleted file mode 100644 +index 1f1a5c9b9..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll ++++ /dev/null +@@ -1,685 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +-; RUN: llc --mtriple=loongarch32 -loongarch-check-zero-division < %s \ +-; RUN: | FileCheck %s --check-prefix=LA32-TRAP +-; RUN: llc --mtriple=loongarch64 -loongarch-check-zero-division < %s \ +-; RUN: | FileCheck %s --check-prefix=LA64-TRAP +- +-;; Test the sdiv/udiv/srem/urem LLVM IR. +- +-define i1 @sdiv_i1(i1 %a, i1 %b) { +-; LA32-LABEL: sdiv_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sdiv_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: sdiv_i1: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: sdiv_i1: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = sdiv i1 %a, %b +- ret i1 %r +-} +- +-define i8 @sdiv_i8(i8 %a, i8 %b) { +-; LA32-LABEL: sdiv_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ext.w.b $a1, $a1 +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: div.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sdiv_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ext.w.b $a1, $a1 +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: div.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: sdiv_i8: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: ext.w.b $a1, $a1 +-; LA32-TRAP-NEXT: ext.w.b $a0, $a0 +-; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: sdiv_i8: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: ext.w.b $a1, $a1 +-; LA64-TRAP-NEXT: ext.w.b $a0, $a0 +-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = sdiv i8 %a, %b +- ret i8 %r +-} +- +-define i16 @sdiv_i16(i16 %a, i16 %b) { +-; LA32-LABEL: sdiv_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ext.w.h $a1, $a1 +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: div.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sdiv_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ext.w.h $a1, $a1 +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: div.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: sdiv_i16: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: ext.w.h $a1, $a1 +-; LA32-TRAP-NEXT: ext.w.h $a0, $a0 +-; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: sdiv_i16: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: ext.w.h $a1, $a1 +-; LA64-TRAP-NEXT: ext.w.h $a0, $a0 +-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = sdiv i16 %a, %b +- ret i16 %r +-} +- +-define i32 @sdiv_i32(i32 %a, i32 %b) { +-; LA32-LABEL: sdiv_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: div.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sdiv_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: div.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: sdiv_i32: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: sdiv_i32: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = sdiv i32 %a, %b +- ret i32 %r +-} +- +-define i64 @sdiv_i64(i64 %a, i64 %b) { +-; LA32-LABEL: sdiv_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: bl __divdi3 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sdiv_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: div.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: sdiv_i64: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +-; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +-; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-TRAP-NEXT: .cfi_offset 1, -4 +-; LA32-TRAP-NEXT: bl __divdi3 +-; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: sdiv_i64: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = sdiv i64 %a, %b +- ret i64 %r +-} +- +-define i1 @udiv_i1(i1 %a, i1 %b) { +-; LA32-LABEL: udiv_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: udiv_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: udiv_i1: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: udiv_i1: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = udiv i1 %a, %b +- ret i1 %r +-} +- +-define i8 @udiv_i8(i8 %a, i8 %b) { +-; LA32-LABEL: udiv_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a1, $a1, 255 +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: div.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: udiv_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a1, $a1, 255 +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: div.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: udiv_i8: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: andi $a1, $a1, 255 +-; LA32-TRAP-NEXT: andi $a0, $a0, 255 +-; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: udiv_i8: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: andi $a1, $a1, 255 +-; LA64-TRAP-NEXT: andi $a0, $a0, 255 +-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = udiv i8 %a, %b +- ret i8 %r +-} +- +-define i16 @udiv_i16(i16 %a, i16 %b) { +-; LA32-LABEL: udiv_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-NEXT: div.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: udiv_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-NEXT: div.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: udiv_i16: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 +-; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: udiv_i16: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 +-; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = udiv i16 %a, %b +- ret i16 %r +-} +- +-define i32 @udiv_i32(i32 %a, i32 %b) { +-; LA32-LABEL: udiv_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: div.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: udiv_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: div.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: udiv_i32: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: udiv_i32: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 +-; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = udiv i32 %a, %b +- ret i32 %r +-} +- +-define i64 @udiv_i64(i64 %a, i64 %b) { +-; LA32-LABEL: udiv_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: bl __udivdi3 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: udiv_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: div.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: udiv_i64: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +-; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +-; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-TRAP-NEXT: .cfi_offset 1, -4 +-; LA32-TRAP-NEXT: bl __udivdi3 +-; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: udiv_i64: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = udiv i64 %a, %b +- ret i64 %r +-} +- +-define i1 @srem_i1(i1 %a, i1 %b) { +-; LA32-LABEL: srem_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: move $a0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srem_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: move $a0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: srem_i1: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: move $a0, $zero +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: srem_i1: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: move $a0, $zero +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = srem i1 %a, %b +- ret i1 %r +-} +- +-define i8 @srem_i8(i8 %a, i8 %b) { +-; LA32-LABEL: srem_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ext.w.b $a1, $a1 +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: mod.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srem_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ext.w.b $a1, $a1 +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: mod.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: srem_i8: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: ext.w.b $a1, $a1 +-; LA32-TRAP-NEXT: ext.w.b $a0, $a0 +-; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: srem_i8: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: ext.w.b $a1, $a1 +-; LA64-TRAP-NEXT: ext.w.b $a0, $a0 +-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = srem i8 %a, %b +- ret i8 %r +-} +- +-define i16 @srem_i16(i16 %a, i16 %b) { +-; LA32-LABEL: srem_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: ext.w.h $a1, $a1 +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: mod.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srem_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: ext.w.h $a1, $a1 +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: mod.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: srem_i16: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: ext.w.h $a1, $a1 +-; LA32-TRAP-NEXT: ext.w.h $a0, $a0 +-; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: srem_i16: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: ext.w.h $a1, $a1 +-; LA64-TRAP-NEXT: ext.w.h $a0, $a0 +-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = srem i16 %a, %b +- ret i16 %r +-} +- +-define i32 @srem_i32(i32 %a, i32 %b) { +-; LA32-LABEL: srem_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mod.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srem_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: mod.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: srem_i32: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: srem_i32: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 +-; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 +-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = srem i32 %a, %b +- ret i32 %r +-} +- +-define i64 @srem_i64(i64 %a, i64 %b) { +-; LA32-LABEL: srem_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: bl __moddi3 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srem_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mod.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: srem_i64: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +-; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +-; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-TRAP-NEXT: .cfi_offset 1, -4 +-; LA32-TRAP-NEXT: bl __moddi3 +-; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: srem_i64: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = srem i64 %a, %b +- ret i64 %r +-} +- +-define i1 @urem_i1(i1 %a, i1 %b) { +-; LA32-LABEL: urem_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: move $a0, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: urem_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: move $a0, $zero +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: urem_i1: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: move $a0, $zero +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: urem_i1: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: move $a0, $zero +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = urem i1 %a, %b +- ret i1 %r +-} +- +-define i8 @urem_i8(i8 %a, i8 %b) { +-; LA32-LABEL: urem_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: andi $a1, $a1, 255 +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: urem_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: andi $a1, $a1, 255 +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: mod.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: urem_i8: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: andi $a1, $a1, 255 +-; LA32-TRAP-NEXT: andi $a0, $a0, 255 +-; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: urem_i8: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: andi $a1, $a1, 255 +-; LA64-TRAP-NEXT: andi $a0, $a0, 255 +-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = urem i8 %a, %b +- ret i8 %r +-} +- +-define i16 @urem_i16(i16 %a, i16 %b) { +-; LA32-LABEL: urem_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: urem_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-NEXT: mod.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: urem_i16: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 +-; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: urem_i16: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 +-; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = urem i16 %a, %b +- ret i16 %r +-} +- +-define i32 @urem_i32(i32 %a, i32 %b) { +-; LA32-LABEL: urem_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: urem_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: mod.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: urem_i32: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 +-; LA32-TRAP-NEXT: bnez $a1, 8 +-; LA32-TRAP-NEXT: break 7 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: urem_i32: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 +-; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = urem i32 %a, %b +- ret i32 %r +-} +- +-define i64 @urem_i64(i64 %a, i64 %b) { +-; LA32-LABEL: urem_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: bl __umoddi3 +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: urem_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: mod.du $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-; +-; LA32-TRAP-LABEL: urem_i64: +-; LA32-TRAP: # %bb.0: # %entry +-; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +-; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +-; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-TRAP-NEXT: .cfi_offset 1, -4 +-; LA32-TRAP-NEXT: bl __umoddi3 +-; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +-; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-TRAP-LABEL: urem_i64: +-; LA64-TRAP: # %bb.0: # %entry +-; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 +-; LA64-TRAP-NEXT: bnez $a1, 8 +-; LA64-TRAP-NEXT: break 7 +-; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = urem i64 %a, %b +- ret i64 %r +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll +deleted file mode 100644 +index 4c6026aba..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll ++++ /dev/null +@@ -1,23 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test the bare double-precision floating-point values selection: +-;; https://llvm.org/docs/LangRef.html#select-instruction +- +-define double @test(i1 %a, double %b, double %c) { +-; LA32-LABEL: test: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: test: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, double %b, double %c +- ret double %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll +deleted file mode 100644 +index af4789b52..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll ++++ /dev/null +@@ -1,23 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test the bare single-precision floating-point values selection: +-;; https://llvm.org/docs/LangRef.html#select-instruction +- +-define float @test(i1 %a, float %b, float %c) { +-; LA32-LABEL: test: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: test: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, float %b, float %c +- ret float %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll +deleted file mode 100644 +index 3481e79b2..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll ++++ /dev/null +@@ -1,107 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the bare integers 'select' LLVM IR: https://llvm.org/docs/LangRef.html#select-instruction +- +-define i1 @bare_select_i1(i1 %a, i1 %b, i1 %c) { +-; LA32-LABEL: bare_select_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: masknez $a2, $a2, $a0 +-; LA32-NEXT: maskeqz $a0, $a1, $a0 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bare_select_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: masknez $a2, $a2, $a0 +-; LA64-NEXT: maskeqz $a0, $a1, $a0 +-; LA64-NEXT: or $a0, $a0, $a2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, i1 %b, i1 %c +- ret i1 %res +-} +- +-define i8 @bare_select_i8(i1 %a, i8 %b, i8 %c) { +-; LA32-LABEL: bare_select_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: masknez $a2, $a2, $a0 +-; LA32-NEXT: maskeqz $a0, $a1, $a0 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bare_select_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: masknez $a2, $a2, $a0 +-; LA64-NEXT: maskeqz $a0, $a1, $a0 +-; LA64-NEXT: or $a0, $a0, $a2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, i8 %b, i8 %c +- ret i8 %res +-} +- +-define i16 @bare_select_i16(i1 %a, i16 %b, i16 %c) { +-; LA32-LABEL: bare_select_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: masknez $a2, $a2, $a0 +-; LA32-NEXT: maskeqz $a0, $a1, $a0 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bare_select_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: masknez $a2, $a2, $a0 +-; LA64-NEXT: maskeqz $a0, $a1, $a0 +-; LA64-NEXT: or $a0, $a0, $a2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, i16 %b, i16 %c +- ret i16 %res +-} +- +-define i32 @bare_select_i32(i1 %a, i32 %b, i32 %c) { +-; LA32-LABEL: bare_select_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: masknez $a2, $a2, $a0 +-; LA32-NEXT: maskeqz $a0, $a1, $a0 +-; LA32-NEXT: or $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bare_select_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: masknez $a2, $a2, $a0 +-; LA64-NEXT: maskeqz $a0, $a1, $a0 +-; LA64-NEXT: or $a0, $a0, $a2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, i32 %b, i32 %c +- ret i32 %res +-} +- +-define i64 @bare_select_i64(i1 %a, i64 %b, i64 %c) { +-; LA32-LABEL: bare_select_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a5, $a0, 1 +-; LA32-NEXT: masknez $a0, $a3, $a5 +-; LA32-NEXT: maskeqz $a1, $a1, $a5 +-; LA32-NEXT: or $a0, $a1, $a0 +-; LA32-NEXT: masknez $a1, $a4, $a5 +-; LA32-NEXT: maskeqz $a2, $a2, $a5 +-; LA32-NEXT: or $a1, $a2, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: bare_select_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: masknez $a2, $a2, $a0 +-; LA64-NEXT: maskeqz $a0, $a1, $a0 +-; LA64-NEXT: or $a0, $a0, $a2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %res = select i1 %a, i64 %b, i64 %c +- ret i64 %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll +deleted file mode 100644 +index 4397b64d9..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll ++++ /dev/null +@@ -1,272 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test double-precision floating-point values selection after comparison +- +-define double @fcmp_false(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: fmov.d $fa0, $fa3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: fmov.d $fa0, $fa3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_oeq(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ogt(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_oge(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_olt(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ole(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_one(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ord(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ueq(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ugt(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_uge(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ult(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_ule(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_une(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_uno(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +- +-define double @fcmp_true(double %a, double %b, double %x, double %y) { +-; LA32-LABEL: fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: fmov.d $fa0, $fa2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: fmov.d $fa0, $fa2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true double %a, %b +- %res = select i1 %cmp, double %x, double %y +- ret double %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll +deleted file mode 100644 +index 23d71493c..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll ++++ /dev/null +@@ -1,272 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test single-precision floating-point values selection after comparison +- +-define float @fcmp_false(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: fmov.s $fa0, $fa3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: fmov.s $fa0, $fa3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_oeq(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ogt(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_oge(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_olt(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ole(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_one(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ord(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ueq(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ugt(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_uge(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ult(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_ule(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_une(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_uno(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +- +-define float @fcmp_true(float %a, float %b, float %x, float %y) { +-; LA32-LABEL: fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: fmov.s $fa0, $fa2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: fmov.s $fa0, $fa2 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true float %a, %b +- %res = select i1 %cmp, float %x, float %y +- ret float %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll +deleted file mode 100644 +index 9e742ee57..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll ++++ /dev/null +@@ -1,704 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test integers selection after `fcmp` +- +-define i32 @f32_fcmp_false(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: move $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: move $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_oeq(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ogt(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_oge(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_olt(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ole(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_one(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ord(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ueq(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ugt(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_uge(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ult(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_ule(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_une(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_uno(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f32_fcmp_true(float %a, float %b, i32 %x, i32 %y) { +-; LA32-LABEL: f32_fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f32_fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true float %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_false(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_false: +-; LA32: # %bb.0: +-; LA32-NEXT: move $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_false: +-; LA64: # %bb.0: +-; LA64-NEXT: move $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp false double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_oeq(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_oeq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_oeq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oeq double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ogt(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ogt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ogt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ogt double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_oge(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_oge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_oge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp oge double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_olt(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_olt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_olt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp olt double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ole(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ole: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ole: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ole double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_one(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_one: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_one: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp one double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ord(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ord: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ord: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ord double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ueq(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ueq: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ueq: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ueq double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ugt(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ugt double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_uge(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uge double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ult(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ult double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_ule(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp ule double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_une(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_une: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_une: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp une double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_uno(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_uno: +-; LA32: # %bb.0: +-; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA32-NEXT: movcf2gr $a2, $fcc0 +-; LA32-NEXT: masknez $a1, $a1, $a2 +-; LA32-NEXT: maskeqz $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_uno: +-; LA64: # %bb.0: +-; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +-; LA64-NEXT: movcf2gr $a2, $fcc0 +-; LA64-NEXT: masknez $a1, $a1, $a2 +-; LA64-NEXT: maskeqz $a0, $a0, $a2 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp uno double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @f64_fcmp_true(double %a, double %b, i32 %x, i32 %y) { +-; LA32-LABEL: f64_fcmp_true: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: f64_fcmp_true: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cmp = fcmp true double %a, %b +- %res = select i1 %cmp, i32 %x, i32 %y +- ret i32 %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll +deleted file mode 100644 +index 5ccee6b19..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll ++++ /dev/null +@@ -1,206 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test double-precision floating-point values selection after integers comparison +- +-define double @select_eq(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_eq: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_eq: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp eq i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_ne(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_ne: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ne: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ne i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_ugt(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ugt i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_uge(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp uge i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_ult(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ult i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_ule(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ule i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_sgt(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_sgt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sgt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sgt i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_sge(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_sge: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sge: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sge i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_slt(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_slt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_slt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp slt i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +- +-define double @select_sle(i32 signext %a, i32 signext %b, double %x, double %y) { +-; LA32-LABEL: select_sle: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sle: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sle i32 %a, %b +- %res = select i1 %cond, double %x, double %y +- ret double %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll +deleted file mode 100644 +index 98b999776..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll ++++ /dev/null +@@ -1,206 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test single-precision floating-point values selection after integers comparison +- +-define float @select_eq(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_eq: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_eq: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp eq i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_ne(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_ne: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ne: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ne i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_ugt(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ugt i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_uge(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp uge i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_ult(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ult i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_ule(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ule i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_sgt(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_sgt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sgt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sgt i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_sge(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_sge: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sge: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sge i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_slt(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_slt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_slt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp slt i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +- +-define float @select_sle(i32 signext %a, i32 signext %b, float %x, float %y) { +-; LA32-LABEL: select_sle: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: movgr2cf $fcc0, $a0 +-; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sle: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: movgr2cf $fcc0, $a0 +-; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sle i32 %a, %b +- %res = select i1 %cond, float %x, float %y +- ret float %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll +deleted file mode 100644 +index 3b7c2adfb..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll ++++ /dev/null +@@ -1,226 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test integers selection after integers comparison +- +-define i32 @select_eq(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_eq: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltui $a0, $a0, 1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_eq: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltui $a0, $a0, 1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp eq i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_ne(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_ne: +-; LA32: # %bb.0: +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ne: +-; LA64: # %bb.0: +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ne i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_ugt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_ugt: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ugt: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ugt i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_uge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_uge: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_uge: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp uge i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_ult(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_ult: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a0, $a1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ult: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a0, $a1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ult i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_ule(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_ule: +-; LA32: # %bb.0: +-; LA32-NEXT: sltu $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_ule: +-; LA64: # %bb.0: +-; LA64-NEXT: sltu $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp ule i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_sgt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_sgt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sgt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sgt i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_sge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_sge: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sge: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sge i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_slt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_slt: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a0, $a1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_slt: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a0, $a1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp slt i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +- +-define i32 @select_sle(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { +-; LA32-LABEL: select_sle: +-; LA32: # %bb.0: +-; LA32-NEXT: slt $a0, $a1, $a0 +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: masknez $a1, $a3, $a0 +-; LA32-NEXT: maskeqz $a0, $a2, $a0 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: select_sle: +-; LA64: # %bb.0: +-; LA64-NEXT: slt $a0, $a1, $a0 +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: masknez $a1, $a3, $a0 +-; LA64-NEXT: maskeqz $a0, $a2, $a0 +-; LA64-NEXT: or $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %cond = icmp sle i32 %a, %b +- %res = select i1 %cond, i32 %x, i32 %y +- ret i32 %res +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll +deleted file mode 100644 +index 911751bc6..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll ++++ /dev/null +@@ -1,418 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Test sext/zext/trunc +- +-define i8 @sext_i1_to_i8(i1 %a) { +-; LA32-LABEL: sext_i1_to_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: sub.w $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i1_to_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: sub.d $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i1 %a to i8 +- ret i8 %1 +-} +- +-define i16 @sext_i1_to_i16(i1 %a) { +-; LA32-LABEL: sext_i1_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: sub.w $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i1_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: sub.d $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i1 %a to i16 +- ret i16 %1 +-} +- +-define i32 @sext_i1_to_i32(i1 %a) { +-; LA32-LABEL: sext_i1_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: sub.w $a0, $zero, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i1_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: sub.d $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i1 %a to i32 +- ret i32 %1 +-} +- +-define i64 @sext_i1_to_i64(i1 %a) { +-; LA32-LABEL: sext_i1_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: sub.w $a0, $zero, $a0 +-; LA32-NEXT: move $a1, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i1_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: sub.d $a0, $zero, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i1 %a to i64 +- ret i64 %1 +-} +- +-define i16 @sext_i8_to_i16(i8 %a) { +-; LA32-LABEL: sext_i8_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i8_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i8 %a to i16 +- ret i16 %1 +-} +- +-define i32 @sext_i8_to_i32(i8 %a) { +-; LA32-LABEL: sext_i8_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i8_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i8 %a to i32 +- ret i32 %1 +-} +- +-define i64 @sext_i8_to_i64(i8 %a) { +-; LA32-LABEL: sext_i8_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.b $a0, $a0 +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i8_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.b $a0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i8 %a to i64 +- ret i64 %1 +-} +- +-define i32 @sext_i16_to_i32(i16 %a) { +-; LA32-LABEL: sext_i16_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i16_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i16 %a to i32 +- ret i32 %1 +-} +- +-define i64 @sext_i16_to_i64(i16 %a) { +-; LA32-LABEL: sext_i16_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: ext.w.h $a0, $a0 +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i16_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: ext.w.h $a0, $a0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i16 %a to i64 +- ret i64 %1 +-} +- +-define i64 @sext_i32_to_i64(i32 %a) { +-; LA32-LABEL: sext_i32_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: srai.w $a1, $a0, 31 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sext_i32_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: addi.w $a0, $a0, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = sext i32 %a to i64 +- ret i64 %1 +-} +- +-define i8 @zext_i1_to_i8(i1 %a) { +-; LA32-LABEL: zext_i1_to_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i1_to_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i1 %a to i8 +- ret i8 %1 +-} +- +-define i16 @zext_i1_to_i16(i1 %a) { +-; LA32-LABEL: zext_i1_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i1_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i1 %a to i16 +- ret i16 %1 +-} +- +-define i32 @zext_i1_to_i32(i1 %a) { +-; LA32-LABEL: zext_i1_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i1_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i1 %a to i32 +- ret i32 %1 +-} +- +-define i64 @zext_i1_to_i64(i1 %a) { +-; LA32-LABEL: zext_i1_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 1 +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i1_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i1 %a to i64 +- ret i64 %1 +-} +- +-define i16 @zext_i8_to_i16(i8 %a) { +-; LA32-LABEL: zext_i8_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i8_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i8 %a to i16 +- ret i16 %1 +-} +- +-define i32 @zext_i8_to_i32(i8 %a) { +-; LA32-LABEL: zext_i8_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i8_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i8 %a to i32 +- ret i32 %1 +-} +- +-define i64 @zext_i8_to_i64(i8 %a) { +-; LA32-LABEL: zext_i8_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a0, $a0, 255 +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i8_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a0, $a0, 255 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i8 %a to i64 +- ret i64 %1 +-} +- +-define i32 @zext_i16_to_i32(i16 %a) { +-; LA32-LABEL: zext_i16_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i16_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i16 %a to i32 +- ret i32 %1 +-} +- +-define i64 @zext_i16_to_i64(i16 %a) { +-; LA32-LABEL: zext_i16_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i16_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i16 %a to i64 +- ret i64 %1 +-} +- +-define i64 @zext_i32_to_i64(i32 %a) { +-; LA32-LABEL: zext_i32_to_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: move $a1, $zero +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: zext_i32_to_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = zext i32 %a to i64 +- ret i64 %1 +-} +- +-define i1 @trunc_i8_to_i1(i8 %a) { +-; LA32-LABEL: trunc_i8_to_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i8_to_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i8 %a to i1 +- ret i1 %1 +-} +- +-define i1 @trunc_i16_to_i1(i16 %a) { +-; LA32-LABEL: trunc_i16_to_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i16_to_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i16 %a to i1 +- ret i1 %1 +-} +- +-define i1 @trunc_i32_to_i1(i32 %a) { +-; LA32-LABEL: trunc_i32_to_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i32_to_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i32 %a to i1 +- ret i1 %1 +-} +- +-define i1 @trunc_i64_to_i1(i64 %a) { +-; LA32-LABEL: trunc_i64_to_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i64_to_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i64 %a to i1 +- ret i1 %1 +-} +- +-define i8 @trunc_i16_to_i8(i16 %a) { +-; LA32-LABEL: trunc_i16_to_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i16_to_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i16 %a to i8 +- ret i8 %1 +-} +- +-define i8 @trunc_i32_to_i8(i32 %a) { +-; LA32-LABEL: trunc_i32_to_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i32_to_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i32 %a to i8 +- ret i8 %1 +-} +- +-define i8 @trunc_i64_to_i8(i64 %a) { +-; LA32-LABEL: trunc_i64_to_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i64_to_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i64 %a to i8 +- ret i8 %1 +-} +- +-define i16 @trunc_i32_to_i16(i32 %a) { +-; LA32-LABEL: trunc_i32_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i32_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i32 %a to i16 +- ret i16 %1 +-} +- +-define i16 @trunc_i64_to_i16(i64 %a) { +-; LA32-LABEL: trunc_i64_to_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i64_to_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i64 %a to i16 +- ret i16 %1 +-} +- +-define i32 @trunc_i64_to_i32(i64 %a) { +-; LA32-LABEL: trunc_i64_to_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: trunc_i64_to_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = trunc i64 %a to i32 +- ret i32 %1 +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll +deleted file mode 100644 +index de2504045..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll ++++ /dev/null +@@ -1,156 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'shl' LLVM IR: https://llvm.org/docs/LangRef.html#shl-instruction +- +-define i1 @shl_i1(i1 %x, i1 %y) { +-; LA32-LABEL: shl_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i1 %x, %y +- ret i1 %shl +-} +- +-define i8 @shl_i8(i8 %x, i8 %y) { +-; LA32-LABEL: shl_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: sll.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i8 %x, %y +- ret i8 %shl +-} +- +-define i16 @shl_i16(i16 %x, i16 %y) { +-; LA32-LABEL: shl_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: sll.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i16 %x, %y +- ret i16 %shl +-} +- +-define i32 @shl_i32(i32 %x, i32 %y) { +-; LA32-LABEL: shl_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: sll.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i32 %x, %y +- ret i32 %shl +-} +- +-define i64 @shl_i64(i64 %x, i64 %y) { +-; LA32-LABEL: shl_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: xori $a3, $a2, 31 +-; LA32-NEXT: srli.w $a4, $a0, 1 +-; LA32-NEXT: srl.w $a3, $a4, $a3 +-; LA32-NEXT: sll.w $a1, $a1, $a2 +-; LA32-NEXT: or $a1, $a1, $a3 +-; LA32-NEXT: addi.w $a3, $a2, -32 +-; LA32-NEXT: slti $a4, $a3, 0 +-; LA32-NEXT: maskeqz $a1, $a1, $a4 +-; LA32-NEXT: sll.w $a5, $a0, $a3 +-; LA32-NEXT: masknez $a4, $a5, $a4 +-; LA32-NEXT: or $a1, $a1, $a4 +-; LA32-NEXT: sll.w $a0, $a0, $a2 +-; LA32-NEXT: srai.w $a2, $a3, 31 +-; LA32-NEXT: and $a0, $a2, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: sll.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i64 %x, %y +- ret i64 %shl +-} +- +-define i1 @shl_i1_3(i1 %x) { +-; LA32-LABEL: shl_i1_3: +-; LA32: # %bb.0: +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i1_3: +-; LA64: # %bb.0: +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i1 %x, 3 +- ret i1 %shl +-} +- +-define i8 @shl_i8_3(i8 %x) { +-; LA32-LABEL: shl_i8_3: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i8_3: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i8 %x, 3 +- ret i8 %shl +-} +- +-define i16 @shl_i16_3(i16 %x) { +-; LA32-LABEL: shl_i16_3: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i16_3: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i16 %x, 3 +- ret i16 %shl +-} +- +-define i32 @shl_i32_3(i32 %x) { +-; LA32-LABEL: shl_i32_3: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i32_3: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i32 %x, 3 +- ret i32 %shl +-} +- +-define i64 @shl_i64_3(i64 %x) { +-; LA32-LABEL: shl_i64_3: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a1, $a1, 3 +-; LA32-NEXT: srli.w $a2, $a0, 29 +-; LA32-NEXT: or $a1, $a1, $a2 +-; LA32-NEXT: slli.w $a0, $a0, 3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: shl_i64_3: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a0, $a0, 3 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %shl = shl i64 %x, 3 +- ret i64 %shl +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll +deleted file mode 100644 +index dfa55c29e..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll ++++ /dev/null +@@ -1,93 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'sub' LLVM IR: https://llvm.org/docs/LangRef.html#sub-instruction +- +-define i1 @sub_i1(i1 %x, i1 %y) { +-; LA32-LABEL: sub_i1: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i1: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i1 %x, %y +- ret i1 %sub +-} +- +-define i8 @sub_i8(i8 %x, i8 %y) { +-; LA32-LABEL: sub_i8: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i8: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i8 %x, %y +- ret i8 %sub +-} +- +-define i16 @sub_i16(i16 %x, i16 %y) { +-; LA32-LABEL: sub_i16: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i16: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i16 %x, %y +- ret i16 %sub +-} +- +-define i32 @sub_i32(i32 %x, i32 %y) { +-; LA32-LABEL: sub_i32: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i32: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i32 %x, %y +- ret i32 %sub +-} +- +-;; Match the pattern: +-;; def : PatGprGpr_32; +-define signext i32 @sub_i32_sext(i32 %x, i32 %y) { +-; LA32-LABEL: sub_i32_sext: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i32_sext: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i32 %x, %y +- ret i32 %sub +-} +- +-define i64 @sub_i64(i64 %x, i64 %y) { +-; LA32-LABEL: sub_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: sub.w $a1, $a1, $a3 +-; LA32-NEXT: sltu $a3, $a0, $a2 +-; LA32-NEXT: sub.w $a1, $a1, $a3 +-; LA32-NEXT: sub.w $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sub_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: sub.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %sub = sub i64 %x, %y +- ret i64 %sub +-} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll +deleted file mode 100644 +index 2f85e645c..000000000 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll ++++ /dev/null +@@ -1,264 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; Exercise the 'xor' LLVM IR: https://llvm.org/docs/LangRef.html#xor-instruction +- +-define i1 @xor_i1(i1 %a, i1 %b) { +-; LA32-LABEL: xor_i1: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i1: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i1 %a, %b +- ret i1 %r +-} +- +-define i8 @xor_i8(i8 %a, i8 %b) { +-; LA32-LABEL: xor_i8: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i8: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i8 %a, %b +- ret i8 %r +-} +- +-define i16 @xor_i16(i16 %a, i16 %b) { +-; LA32-LABEL: xor_i16: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i16: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i16 %a, %b +- ret i16 %r +-} +- +-define i32 @xor_i32(i32 %a, i32 %b) { +-; LA32-LABEL: xor_i32: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i32: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i32 %a, %b +- ret i32 %r +-} +- +-define i64 @xor_i64(i64 %a, i64 %b) { +-; LA32-LABEL: xor_i64: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xor $a0, $a0, $a2 +-; LA32-NEXT: xor $a1, $a1, $a3 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i64: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i64 %a, %b +- ret i64 %r +-} +- +-define i1 @xor_i1_0(i1 %b) { +-; LA32-LABEL: xor_i1_0: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i1_0: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i1 4, %b +- ret i1 %r +-} +- +-define i1 @xor_i1_5(i1 %b) { +-; LA32-LABEL: xor_i1_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i1_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i1 5, %b +- ret i1 %r +-} +- +-define i8 @xor_i8_5(i8 %b) { +-; LA32-LABEL: xor_i8_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i8_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i8 5, %b +- ret i8 %r +-} +- +-define i8 @xor_i8_257(i8 %b) { +-; LA32-LABEL: xor_i8_257: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i8_257: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i8 257, %b +- ret i8 %r +-} +- +-define i16 @xor_i16_5(i16 %b) { +-; LA32-LABEL: xor_i16_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i16_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i16 5, %b +- ret i16 %r +-} +- +-define i16 @xor_i16_0x1000(i16 %b) { +-; LA32-LABEL: xor_i16_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i16_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i16 4096, %b +- ret i16 %r +-} +- +-define i16 @xor_i16_0x10001(i16 %b) { +-; LA32-LABEL: xor_i16_0x10001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i16_0x10001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i16 65537, %b +- ret i16 %r +-} +- +-define i32 @xor_i32_5(i32 %b) { +-; LA32-LABEL: xor_i32_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i32_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i32 5, %b +- ret i32 %r +-} +- +-define i32 @xor_i32_0x1000(i32 %b) { +-; LA32-LABEL: xor_i32_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a1, 1 +-; LA32-NEXT: xor $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i32_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i32 4096, %b +- ret i32 %r +-} +- +-define i32 @xor_i32_0x100000001(i32 %b) { +-; LA32-LABEL: xor_i32_0x100000001: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i32_0x100000001: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i32 4294967297, %b +- ret i32 %r +-} +- +-define i64 @xor_i64_5(i64 %b) { +-; LA32-LABEL: xor_i64_5: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: xori $a0, $a0, 5 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i64_5: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: xori $a0, $a0, 5 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i64 5, %b +- ret i64 %r +-} +- +-define i64 @xor_i64_0x1000(i64 %b) { +-; LA32-LABEL: xor_i64_0x1000: +-; LA32: # %bb.0: # %entry +-; LA32-NEXT: lu12i.w $a2, 1 +-; LA32-NEXT: xor $a0, $a0, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: xor_i64_0x1000: +-; LA64: # %bb.0: # %entry +-; LA64-NEXT: lu12i.w $a1, 1 +-; LA64-NEXT: xor $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +-entry: +- %r = xor i64 4096, %b +- ret i64 %r +-} +diff --git a/llvm/test/CodeGen/LoongArch/jirl-verify.ll b/llvm/test/CodeGen/LoongArch/jirl-verify.ll +new file mode 100644 +index 000000000..70b588bea +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/jirl-verify.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -verify-machineinstrs -o - %s \ ++; RUN: | FileCheck %s --check-prefix=STATIC ++; RUN: llc -march=loongarch64 -verify-machineinstrs -relocation-model=pic --code-model=large -o - %s \ ++; RUN: | FileCheck %s --check-prefix=LARGE ++ ++define void @test() nounwind { ++; STATIC-LABEL: test: ++; STATIC: # %bb.0: ++; STATIC-NEXT: addi.d $sp, $sp, -16 ++; STATIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; STATIC-NEXT: lu12i.w $ra, foo ++; STATIC-NEXT: ori $ra, $ra, foo ++; STATIC-NEXT: lu32i.d $ra, foo ++; STATIC-NEXT: lu52i.d $ra, $ra, foo ++; STATIC-NEXT: jirl $ra, $ra, 0 ++; STATIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; STATIC-NEXT: addi.d $sp, $sp, 16 ++; STATIC-NEXT: jr $ra ++; ++; LARGE-LABEL: test: ++; LARGE: # %bb.0: ++; LARGE-NEXT: addi.d $sp, $sp, -16 ++; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; LARGE-NEXT: pcaddu18i $ra, foo ++; LARGE-NEXT: jirl $ra, $ra, foo ++; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; LARGE-NEXT: addi.d $sp, $sp, 16 ++; LARGE-NEXT: jr $ra ++ call void @foo() nounwind ++ ret void ++} ++ ++declare void @foo() +diff --git a/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll b/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll +new file mode 100644 +index 000000000..1b4b52c7a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll +@@ -0,0 +1,54 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @uvadd(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvadd: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> ++ %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> ++ %2 = add <4 x i32> %0, %1 ++ %3 = zext <4 x i32> %2 to <4 x i64> ++ ret <4 x i64> %3 ++} ++ ++define <4 x i64> @svadd(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svadd: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> ++ %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> ++ %2 = add nsw <4 x i32> %0, %1 ++ %3 = sext <4 x i32> %2 to <4 x i64> ++ ret <4 x i64> %3 ++} ++ ++define <4 x i64> @uvsub(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvsub: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> ++ %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> ++ %2 = sub <4 x i32> %0, %1 ++ %3 = zext <4 x i32> %2 to <4 x i64> ++ ret <4 x i64> %3 ++} ++ ++define <4 x i64> @svsub(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svsub: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> ++ %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> ++ %2 = sub nsw <4 x i32> %0, %1 ++ %3 = sext <4 x i32> %2 to <4 x i64> ++ ret <4 x i64> %3 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll +new file mode 100644 +index 000000000..07b80895b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll +@@ -0,0 +1,176 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @build_lasx0(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx0: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx1(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx1: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu52i.d $r4, $zero, 2047 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx2(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx2: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx3(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx3: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx4(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx4: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx5(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx5: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx6(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx6: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx7(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx7: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx8(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx9(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx9: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx10(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx10: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx11(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx11: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx12(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx12: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} ++ ++define <4 x i64> @build_lasx13(<4 x i64> %a) { ++; CHECK-LABEL: build_lasx13: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %b = add <4 x i64> %a, ++ ret <4 x i64> %b ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll +new file mode 100644 +index 000000000..337632491 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll +@@ -0,0 +1,55 @@ ++; A basic inline assembly test ++ ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++@v4i64_r = global <4 x i64> zeroinitializer, align 32 ++@v8i32_r = global <8 x i32> zeroinitializer, align 32 ++ ++define void @test1() nounwind { ++entry: ++ ; CHECK-LABEL: test1: ++ %0 = call <4 x i64> asm "xvldi ${0:u}, 1", "=f"() ++ ; CHECK: xvldi $xr{{[1-3]?[0-9]}}, 1 ++ store <4 x i64> %0, <4 x i64>* @v4i64_r ++ ret void ++} ++ ++define void @test2() nounwind { ++entry: ++ ; CHECK-LABEL: test2: ++ %0 = load <8 x i32>, <8 x i32>* @v8i32_r ++ %1 = call <8 x i32> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(<8 x i32> %0) ++ ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 ++ store <8 x i32> %1, <8 x i32>* @v8i32_r ++ ret void ++} ++ ++define void @test2_d() nounwind { ++entry: ++ ; CHECK-LABEL: test2_d: ++ %0 = load < 4 x i64>, < 4 x i64>* @v4i64_r ++ %1 = call < 4 x i64> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(< 4 x i64> %0) ++ ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 ++ store < 4 x i64> %1, < 4 x i64>* @v4i64_r ++ ret void ++} ++ ++define void @test3() nounwind { ++entry: ++ ; CHECK-LABEL: test3: ++ %0 = load <8 x i32>, <8 x i32>* @v8i32_r ++ %1 = call <8 x i32> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<8 x i32> %0) ++ ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 ++ store <8 x i32> %1, <8 x i32>* @v8i32_r ++ ret void ++} ++ ++define void @test3_d() nounwind { ++entry: ++ ; CHECK-LABEL: test3_d: ++ %0 = load <4 x i64>, <4 x i64>* @v4i64_r ++ %1 = call <4 x i64> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<4 x i64> %0) ++ ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 ++ store <4 x i64> %1, <4 x i64>* @v4i64_r ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +new file mode 100644 +index 000000000..51fa34606 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <16 x i16> @lasxH(<16 x i16> %d, <8 x i16> %s1) { ++; CHECK-LABEL: lasxH: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <8 x i16> %s1, <8 x i16> poison, <16 x i32> ++ %r2 = shufflevector <16 x i16> %r1, <16 x i16> %d, <16 x i32> ++ ret <16 x i16> %r2 ++} ++ ++define <8 x i32> @lasxW(<8 x i32> %d, <4 x i32> %s1) { ++; CHECK-LABEL: lasxW: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <4 x i32> %s1, <4 x i32> poison, <8 x i32> ++ %r2 = shufflevector <8 x i32> %r1, <8 x i32> %d, <8 x i32> ++ ret <8 x i32> %r2 ++} ++ ++define <4 x i64> @lasxD(<4 x i64> %d, <2 x i64> %s1) { ++; CHECK-LABEL: lasxD: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <2 x i64> %s1, <2 x i64> poison, <4 x i32> ++ %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> ++ ret <4 x i64> %r2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll +new file mode 100644 +index 000000000..05b720077 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll +@@ -0,0 +1,70 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <32 x i8> @lasx_xvrepli_b() { ++; CHECK-LABEL: lasx_xvrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 2) ++ ret <32 x i8> %0 ++} ++ ++define <16 x i16> @lasx_xvrepli_h() { ++; CHECK-LABEL: lasx_xvrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 2) ++ ret <16 x i16> %0 ++} ++ ++define <8 x i32> @lasx_xvrepli_w() { ++; CHECK-LABEL: lasx_xvrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 2) ++ ret <8 x i32> %0 ++} ++ ++define <4 x i64> @lasx_xvrepli_d() { ++; CHECK-LABEL: lasx_xvrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 2) ++ ret <4 x i64> %0 ++} ++ ++define <4 x double> @lasx_xvpickve_d_f(<4 x double> %a) { ++; CHECK-LABEL: lasx_xvpickve_d_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.d $xr0, $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %a, i32 2) ++ ret <4 x double> %0 ++} ++ ++define <8 x float> @lasx_xvpickve_w_f(<8 x float> %a) { ++; CHECK-LABEL: lasx_xvpickve_w_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %a, i32 2) ++ ret <8 x float> %0 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll b/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll +new file mode 100644 +index 000000000..b40698104 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll +@@ -0,0 +1,46 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @clri8(<32 x i8> %0, <32 x i8> %1) { ++; CHECK-LABEL: clri8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = shl <32 x i8> , %1 ++ %4 = xor <32 x i8> %3, ++ %5 = and <32 x i8> %4, %0 ++ ret <32 x i8> %5 ++} ++ ++define <16 x i16> @clri16(<16 x i16> %0, <16 x i16> %1) { ++; CHECK-LABEL: clri16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = shl <16 x i16> , %1 ++ %4 = xor <16 x i16> %3, ++ %5 = and <16 x i16> %4, %0 ++ ret <16 x i16> %5 ++} ++ ++define <8 x i32> @clri32(<8 x i32> %0, <8 x i32> %1) { ++; CHECK-LABEL: clri32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = shl <8 x i32> , %1 ++ %4 = xor <8 x i32> %3, ++ %5 = and <8 x i32> %4, %0 ++ ret <8 x i32> %5 ++} ++ ++define <4 x i64> @clri64(<4 x i64> %0, <4 x i64> %1) { ++; CHECK-LABEL: clri64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = shl <4 x i64> , %1 ++ %4 = xor <4 x i64> %3, ++ %5 = and <4 x i64> %4, %0 ++ ret <4 x i64> %5 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll +new file mode 100644 +index 000000000..ff28569a1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll +@@ -0,0 +1,130 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @not_v4i64(<4 x i64> %a) { ++; CHECK-LABEL: not_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i64> %a, ++ ret <4 x i64> %not ++} ++ ++define <8 x i32> @not_v8i32(<8 x i32> %a) { ++; CHECK-LABEL: not_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i32> %a, ++ ret <8 x i32> %not ++} ++ ++define <16 x i16> @not_v16i16(<16 x i16> %a) { ++; CHECK-LABEL: not_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i16> %a, ++ ret <16 x i16> %not ++} ++ ++define <32 x i8> @not_v32i8(<32 x i8> %a) { ++; CHECK-LABEL: not_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <32 x i8> %a, ++ ret <32 x i8> %not ++} ++ ++define <4 x i64> @andn_v4i64(<4 x i64> %a, <4 x i64> %b) { ++; CHECK-LABEL: andn_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i64> %b, ++ %and = and <4 x i64> %not, %a ++ ret <4 x i64> %and ++} ++ ++define <8 x i32> @andn_v8i32(<8 x i32> %a, <8 x i32> %b) { ++; CHECK-LABEL: andn_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i32> %b, ++ %and = and <8 x i32> %not, %a ++ ret <8 x i32> %and ++} ++ ++define <16 x i16> @andn_v16i16(<16 x i16> %a, <16 x i16> %b) { ++; CHECK-LABEL: andn_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i16> %b, ++ %and = and <16 x i16> %not, %a ++ ret <16 x i16> %and ++} ++ ++define <32 x i8> @andn_v32i8(<32 x i8> %a, <32 x i8> %b) { ++; CHECK-LABEL: andn_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <32 x i8> %b, ++ %and = and <32 x i8> %not, %a ++ ret <32 x i8> %and ++} ++ ++define <4 x i64> @orn_v4i64(<4 x i64> %a, <4 x i64> %b) { ++; CHECK-LABEL: orn_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i64> %b, ++ %or = or <4 x i64> %not, %a ++ ret <4 x i64> %or ++} ++ ++define <8 x i32> @orn_v8i32(<8 x i32> %a, <8 x i32> %b) { ++; CHECK-LABEL: orn_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i32> %b, ++ %or = or <8 x i32> %not, %a ++ ret <8 x i32> %or ++} ++ ++define <16 x i16> @orn_v16i16(<16 x i16> %a, <16 x i16> %b) { ++; CHECK-LABEL: orn_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i16> %b, ++ %or = or <16 x i16> %not, %a ++ ret <16 x i16> %or ++} ++ ++define <32 x i8> @orn_v32i8(<32 x i8> %a, <32 x i8> %b) { ++; CHECK-LABEL: orn_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <32 x i8> %b, ++ %or = or <32 x i8> %not, %a ++ ret <32 x i8> %or ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll +new file mode 100644 +index 000000000..443262eac +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @seti8(<32 x i8>) { ++; CHECK-LABEL: seti8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <32 x i8> %0, ++ ret <32 x i8> %2 ++} ++ ++define <16 x i16> @seti16(<16 x i16>) { ++; CHECK-LABEL: seti16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <16 x i16> %0, ++ ret <16 x i16> %2 ++} ++ ++define <8 x i32> @seti32(<8 x i32>) { ++; CHECK-LABEL: seti32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <8 x i32> %0, ++ ret <8 x i32> %2 ++} ++ ++define <4 x i64> @seti64(<4 x i64>) { ++; CHECK-LABEL: seti64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <4 x i64> %0, ++ ret <4 x i64> %2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll +new file mode 100644 +index 000000000..965cfe94c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll +@@ -0,0 +1,19 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @shuffle_v4i64_1032(<4 x i64> %vj, <4 x i64> %vk) { ++; CHECK-LABEL: shuffle_v4i64_1032: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.d $r4, $xr0, 2 ++; CHECK-NEXT: xvpickve2gr.d $r5, $xr0, 3 ++; CHECK-NEXT: xvpickve2gr.d $r6, $xr0, 0 ++; CHECK-NEXT: xvpickve2gr.d $r7, $xr0, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r7, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r6, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r5, 2 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r4, 3 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <4 x i64> %vj, <4 x i64> %vk, <4 x i32> ++ ret <4 x i64> %vd ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll +new file mode 100644 +index 000000000..7bd3dca73 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll +@@ -0,0 +1,65 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @s_v4i32_v4i64(<4 x i32> %a0) { ++; CHECK-LABEL: s_v4i32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ %1 = sext <4 x i32> %a0 to <4 x i64> ++ ret <4 x i64> %1 ++} ++ ++define <4 x i64> @z_v4i32_v4i64(<4 x i32> %a0) { ++; CHECK-LABEL: z_v4i32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ %1 = zext <4 x i32> %a0 to <4 x i64> ++ ret <4 x i64> %1 ++} ++ ++define <16 x i16> @s_v16i8_v16i16(<16 x i8> %A) { ++; CHECK-LABEL: s_v16i8_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ entry: ++ %B = sext <16 x i8> %A to <16 x i16> ++ ret <16 x i16> %B ++} ++ ++define <16 x i16> @z_v16i8_v16i16(<16 x i8> %A) { ++; CHECK-LABEL: z_v16i8_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ entry: ++ %B = zext <16 x i8> %A to <16 x i16> ++ ret <16 x i16> %B ++} ++ ++define <8 x i32> @s_v8i16_v8i32(<8 x i16> %x) { ++; CHECK-LABEL: s_v8i16_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ %1 = sext <8 x i16> %x to <8 x i32> ++ ret <8 x i32> %1 ++} ++ ++define <8 x i32> @z_v8i16_v8i32(<8 x i16> %x) { ++; CHECK-LABEL: z_v8i16_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 ++; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 ++; CHECK-NEXT: jr $ra ++ %1 = zext <8 x i16> %x to <8 x i32> ++ ret <8 x i32> %1 ++} ++ +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll b/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll +new file mode 100644 +index 000000000..4c4e31f0f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll +@@ -0,0 +1,106 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @xvabsd_b(<32 x i8> %a, <32 x i8> %b) { ++; CHECK-LABEL: xvabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <32 x i8> %b, %a ++ %subba = sub <32 x i8> %b, %a ++ %subab = sub <32 x i8> %a, %b ++ %select = select <32 x i1> %icmp, <32 x i8> %subba, <32 x i8> %subab ++ ret <32 x i8> %select ++} ++ ++define <16 x i16> @xvabsd_h(<16 x i16> %a, <16 x i16> %b) { ++; CHECK-LABEL: xvabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <16 x i16> %b, %a ++ %subba = sub <16 x i16> %b, %a ++ %subab = sub <16 x i16> %a, %b ++ %select = select <16 x i1> %icmp, <16 x i16> %subba, <16 x i16> %subab ++ ret <16 x i16> %select ++} ++ ++define <8 x i32> @xvabsd_w(<8 x i32> %a, <8 x i32> %b) { ++; CHECK-LABEL: xvabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <8 x i32> %b, %a ++ %subba = sub <8 x i32> %b, %a ++ %subab = sub <8 x i32> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i32> %subba, <8 x i32> %subab ++ ret <8 x i32> %select ++} ++ ++define <4 x i64> @xvabsd_d(<4 x i64> %a, <4 x i64> %b) { ++; CHECK-LABEL: xvabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <4 x i64> %b, %a ++ %subba = sub <4 x i64> %b, %a ++ %subab = sub <4 x i64> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i64> %subba, <4 x i64> %subab ++ ret <4 x i64> %select ++} ++ ++define <32 x i8> @xvabsd_bu(<32 x i8> %a, <32 x i8> %b) { ++; CHECK-LABEL: xvabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <32 x i8> %b, %a ++ %subba = sub <32 x i8> %b, %a ++ %subab = sub <32 x i8> %a, %b ++ %select = select <32 x i1> %icmp, <32 x i8> %subba, <32 x i8> %subab ++ ret <32 x i8> %select ++} ++ ++define <16 x i16> @xvabsd_hu(<16 x i16> %a, <16 x i16> %b) { ++; CHECK-LABEL: xvabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <16 x i16> %b, %a ++ %subba = sub <16 x i16> %b, %a ++ %subab = sub <16 x i16> %a, %b ++ %select = select <16 x i1> %icmp, <16 x i16> %subba, <16 x i16> %subab ++ ret <16 x i16> %select ++} ++ ++define <8 x i32> @xvabsd_wu(<8 x i32> %a, <8 x i32> %b) { ++; CHECK-LABEL: xvabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <8 x i32> %b, %a ++ %subba = sub <8 x i32> %b, %a ++ %subab = sub <8 x i32> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i32> %subba, <8 x i32> %subab ++ ret <8 x i32> %select ++} ++ ++define <4 x i64> @xvabsd_du(<4 x i64> %a, <4 x i64> %b) { ++; CHECK-LABEL: xvabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <4 x i64> %b, %a ++ %subba = sub <4 x i64> %b, %a ++ %subab = sub <4 x i64> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i64> %subba, <4 x i64> %subab ++ ret <4 x i64> %select ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll b/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll +new file mode 100644 +index 000000000..a849fef3e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @xvaddab(<32 x i8>, <32 x i8>) { ++; CHECK-LABEL: xvaddab: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <32 x i8> %0, zeroinitializer ++ %4 = sub <32 x i8> zeroinitializer, %0 ++ %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> %0 ++ %6 = icmp slt <32 x i8> %1, zeroinitializer ++ %7 = sub <32 x i8> zeroinitializer, %1 ++ %8 = select <32 x i1> %6, <32 x i8> %7, <32 x i8> %1 ++ %9 = add <32 x i8> %5, %8 ++ ret <32 x i8> %9 ++} ++ ++define <16 x i16> @xvaddah(<16 x i16>, <16 x i16>) { ++; CHECK-LABEL: xvaddah: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <16 x i16> %0, zeroinitializer ++ %4 = sub <16 x i16> zeroinitializer, %0 ++ %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> %0 ++ %6 = icmp slt <16 x i16> %1, zeroinitializer ++ %7 = sub <16 x i16> zeroinitializer, %1 ++ %8 = select <16 x i1> %6, <16 x i16> %7, <16 x i16> %1 ++ %9 = add <16 x i16> %5, %8 ++ ret <16 x i16> %9 ++} ++ ++define <8 x i32> @xvaddaw(<8 x i32>, <8 x i32>) { ++; CHECK-LABEL: xvaddaw: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <8 x i32> %0, zeroinitializer ++ %4 = sub nsw <8 x i32> zeroinitializer, %0 ++ %5 = select <8 x i1> %3, <8 x i32> %4, <8 x i32> %0 ++ %6 = icmp slt <8 x i32> %1, zeroinitializer ++ %7 = sub nsw <8 x i32> zeroinitializer, %1 ++ %8 = select <8 x i1> %6, <8 x i32> %7, <8 x i32> %1 ++ %9 = add nuw nsw <8 x i32> %5, %8 ++ ret <8 x i32> %9 ++} ++ ++define <4 x i64> @xvaddad(<4 x i64>, <4 x i64>) { ++; CHECK-LABEL: xvaddad: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <4 x i64> %0, zeroinitializer ++ %4 = sub nsw <4 x i64> zeroinitializer, %0 ++ %5 = select <4 x i1> %3, <4 x i64> %4, <4 x i64> %0 ++ %6 = icmp slt <4 x i64> %1, zeroinitializer ++ %7 = sub nsw <4 x i64> zeroinitializer, %1 ++ %8 = select <4 x i1> %6, <4 x i64> %7, <4 x i64> %1 ++ %9 = add nuw nsw <4 x i64> %5, %8 ++ ret <4 x i64> %9 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll b/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll +new file mode 100644 +index 000000000..eb2c493d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @svaddev(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svaddev: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = add nsw <8 x i32> %c, %b ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = sext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @uvaddev(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvaddev: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = add <8 x i32> %c, %b ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = zext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @uvsubev(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvsubev: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = sub <8 x i32> %b, %c ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = zext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @svsubev(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svsubev: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = sub nsw <8 x i32> %b, %c ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = sext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @uvaddod(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvaddod: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = add <8 x i32> %c, %b ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = zext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @svaddod(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svaddod: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = add nsw <8 x i32> %c, %b ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = sext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @uvsubod(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: uvsubod: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = sub <8 x i32> %b, %c ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = zext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} ++ ++define <4 x i64> @svsubod(<8 x i32> %b, <8 x i32> %c) { ++; CHECK-LABEL: svsubod: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = sub nsw <8 x i32> %b, %c ++ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> ++ %2 = sext <4 x i32> %1 to <4 x i64> ++ ret <4 x i64> %2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll b/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll +new file mode 100644 +index 000000000..5b452c5eb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll +@@ -0,0 +1,21 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <4 x i64> @mul(<4 x i64> %a, <8 x i32> %m, <8 x i32> %n) { ++; CHECK-LABEL: mul: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.d.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> ++ %1 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> ++ %2 = add nsw <2 x i32> %0, %1 ++ %3 = sext <2 x i32> %2 to <2 x i64> ++ %4 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> ++ %5 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> ++ %6 = add nsw <2 x i32> %4, %5 ++ %7 = sext <2 x i32> %6 to <2 x i64> ++ %vecins16 = shufflevector <2 x i64> %3, <2 x i64> %7, <4 x i32> ++ ret <4 x i64> %vecins16 ++} ++ +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll b/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll +new file mode 100644 +index 000000000..11f96f435 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll +@@ -0,0 +1,32 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @xvilvhb(<32 x i8> %vj, <32 x i8> %vk) { ++; CHECK-LABEL: xvilvhb: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <32 x i8> %vj, <32 x i8> %vk, <32 x i32> ++ ret <32 x i8> %vd ++} ++ ++define <16 x i16> @xvilvhh(<16 x i16> %vj, <16 x i16> %vk) { ++; CHECK-LABEL: xvilvhh: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <16 x i16> %vj, <16 x i16> %vk, <16 x i32> ++ ret <16 x i16> %vd ++} ++ ++define <8 x i32> @xvilvhw(<8 x i32> %vj, <8 x i32> %vk) { ++; CHECK-LABEL: xvilvhw: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <8 x i32> %vj, <8 x i32> %vk, <8 x i32> ++ ret <8 x i32> %vd ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll b/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll +new file mode 100644 +index 000000000..7249bc76c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll +@@ -0,0 +1,32 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @xvilvlb(<32 x i8> %vj, <32 x i8> %vk) { ++; CHECK-LABEL: xvilvlb: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <32 x i8> %vj, <32 x i8> %vk, <32 x i32> ++ ret <32 x i8> %vd ++} ++ ++define <16 x i16> @xvilvlh(<16 x i16> %vj, <16 x i16> %vk) { ++; CHECK-LABEL: xvilvlh: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <16 x i16> %vj, <16 x i16> %vk, <16 x i32> ++ ret <16 x i16> %vd ++} ++ ++define <8 x i32> @xvilvlw(<8 x i32> %vj, <8 x i32> %vk) { ++; CHECK-LABEL: xvilvlw: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %vd = shufflevector <8 x i32> %vj, <8 x i32> %vk, <8 x i32> ++ ret <8 x i32> %vd ++} +diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll +new file mode 100644 +index 000000000..8395b264f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/ldptr.ll +@@ -0,0 +1,70 @@ ++; Check whether ld.w/ld.d/ldptr.w/ldptr.d/ldx.w/ldx.d instructions are properly generated ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define signext i32 @ld_w(i32* %p) { ++; CHECK-LABEL: ld_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ld.w $r4, $r4, 2044 ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i32, i32* %p, i64 511 ++ %val = load i32, i32* %addr, align 4 ++ ret i32 %val ++} ++ ++define signext i32 @ldptr_w(i32* %p) { ++; CHECK-LABEL: ldptr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ldptr.w $r4, $r4, 2048 ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i32, i32* %p, i64 512 ++ %val = load i32, i32* %addr, align 4 ++ ret i32 %val ++} ++ ++define signext i32 @ldx_w(i32* %p) { ++; CHECK-LABEL: ldx_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 ++; CHECK-NEXT: ldx.w $r4, $r4, $r[[REG:[0-9]+]] ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i32, i32* %p, i64 8192 ++ %val = load i32, i32* %addr, align 4 ++ ret i32 %val ++} ++ ++define i64 @ld_d(i64* %p) { ++; CHECK-LABEL: ld_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ld.d $r4, $r4, 2040 ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i64, i64* %p, i64 255 ++ %val = load i64, i64* %addr, align 8 ++ ret i64 %val ++} ++ ++define i64 @ldptr_d(i64* %p) { ++; CHECK-LABEL: ldptr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ldptr.d $r4, $r4, 2048 ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i64, i64* %p, i64 256 ++ %val = load i64, i64* %addr, align 8 ++ ret i64 %val ++} ++ ++define i64 @ldx_d(i64* %p) { ++; CHECK-LABEL: ldx_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 ++; CHECK-NEXT: ldx.d $r4, $r4, $r[[REG:[0-9]+]] ++; CHECK-NEXT: jr $ra ++entry: ++ %addr = getelementptr inbounds i64, i64* %p, i64 4096 ++ %val = load i64, i64* %addr, align 8 ++ ret i64 %val ++} +diff --git a/llvm/test/CodeGen/LoongArch/lit.local.cfg b/llvm/test/CodeGen/LoongArch/lit.local.cfg +index a54f5aeca..6223fc691 100644 +--- a/llvm/test/CodeGen/LoongArch/lit.local.cfg ++++ b/llvm/test/CodeGen/LoongArch/lit.local.cfg +@@ -1,13 +1,3 @@ +-import os +- +-config.suffixes = ['.ll', '.mir', '.test', '.txt'] +- +-extract_section_path = os.path.join(config.llvm_src_root, +- 'utils', 'extract-section.py') +- +-config.substitutions.append(('extract-section', +- "'%s' %s %s" % (config.python_executable, +- extract_section_path, '--bits-endian little'))) +- + if not 'LoongArch' in config.root.targets: + config.unsupported = True ++ +diff --git a/llvm/test/CodeGen/LoongArch/logic-op.ll b/llvm/test/CodeGen/LoongArch/logic-op.ll +new file mode 100644 +index 000000000..c1029c1ff +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/logic-op.ll +@@ -0,0 +1,171 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define signext i32 @foo32(i32 signext %a) { ++; CHECK-LABEL: foo32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: sltui $r4, $r4, 1 ++; CHECK-NEXT: jr $ra ++entry: ++ %tobool = icmp eq i32 %a, 0 ++ %conv = zext i1 %tobool to i32 ++ ret i32 %conv ++} ++ ++define i64 @foo(i64 %a) { ++; CHECK-LABEL: foo: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: sltui $r4, $r4, 1 ++; CHECK-NEXT: jr $ra ++entry: ++ %tobool = icmp eq i64 %a, 0 ++ %conv = zext i1 %tobool to i64 ++ ret i64 %conv ++} ++ ++define i64 @not(i64 %a) { ++; CHECK-LABEL: not: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i64 %a, -1 ++ ret i64 %not ++} ++ ++define i64 @and(i64 %a, i64 %b) { ++; CHECK-LABEL: and: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: and $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %and = and i64 %b, %a ++ ret i64 %and ++} ++ ++define i64 @or(i64 %a, i64 %b) { ++; CHECK-LABEL: or: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: or $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %or = or i64 %b, %a ++ ret i64 %or ++} ++ ++define i64 @xor(i64 %a, i64 %b) { ++; CHECK-LABEL: xor: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xor $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %xor = xor i64 %b, %a ++ ret i64 %xor ++} ++ ++define i64 @nor(i64 %a, i64 %b) { ++; CHECK-LABEL: nor: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: nor $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %or = or i64 %b, %a ++ %not = xor i64 %or, -1 ++ ret i64 %not ++} ++ ++define i64 @andn(i64 %a, i64 %b) { ++; CHECK-LABEL: andn: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: andn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i64 %b, -1 ++ %and = and i64 %not, %a ++ ret i64 %and ++} ++ ++define signext i32 @andn32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: andn32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: andn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i32 %b, -1 ++ %and = and i32 %not, %a ++ ret i32 %and ++} ++ ++define i64 @orn(i64 %a, i64 %b) { ++; CHECK-LABEL: orn: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: orn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i64 %b, -1 ++ %or = or i64 %not, %a ++ ret i64 %or ++} ++ ++define signext i32 @orn32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: orn32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: orn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i32 %b, -1 ++ %or = or i32 %not, %a ++ ret i32 %or ++} ++ ++define signext i32 @and32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: and32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: and $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %and = and i32 %b, %a ++ ret i32 %and ++} ++ ++define signext i32 @or32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: or32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: or $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %or = or i32 %b, %a ++ ret i32 %or ++} ++ ++define signext i32 @xor32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: xor32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xor $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %xor = xor i32 %b, %a ++ ret i32 %xor ++} ++ ++define signext i32 @nor32(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: nor32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: nor $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %or = or i32 %b, %a ++ %not = xor i32 %or, -1 ++ ret i32 %not ++} ++ ++define signext i32 @not32(i32 signext %a) { ++; CHECK-LABEL: not32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor i32 %a, -1 ++ ret i32 %not ++} ++ +diff --git a/llvm/test/CodeGen/LoongArch/lshr.ll b/llvm/test/CodeGen/LoongArch/lshr.ll +new file mode 100644 +index 000000000..54e4a5f2d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lshr.ll +@@ -0,0 +1,12 @@ ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define signext i32 @foo(i32 %a) { ++; CHECK-LABEL: foo: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 1 ++; CHECK-NEXT: jr $ra ++entry: ++ %b = lshr i32 %a, 1 ++ ret i32 %b ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll +new file mode 100644 +index 000000000..97b23be80 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll +@@ -0,0 +1,176 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <2 x i64> @build_lsx0(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx0: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -1 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx1(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx1: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu52i.d $r4, $zero, 2047 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx2(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx2: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 2048 ++; CHECK-NEXT: lu32i.d $r4, 524287 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx3(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx3: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu52i.d $r4, $r4, -1 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx4(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx4: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r4, $zero, 4095 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx5(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx5: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx6(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx6: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx7(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx7: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r4, $zero, -2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx8(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx9(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx9: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -1 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx10(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx10: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: lu32i.d $r4, 0 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx11(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx11: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, 524287 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -1 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx12(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx12: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2047 ++; CHECK-NEXT: lu52i.d $r4, $r4, 2047 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} ++ ++define <2 x i64> @build_lsx13(<2 x i64> %a) { ++; CHECK-LABEL: build_lsx13: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $r4, -524288 ++; CHECK-NEXT: ori $r4, $r4, 2048 ++; CHECK-NEXT: lu32i.d $r4, -524288 ++; CHECK-NEXT: lu52i.d $r4, $r4, 0 ++; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %b = add <2 x i64> %a, ++ ret <2 x i64> %b ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll +new file mode 100644 +index 000000000..37cb6dfc9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll +@@ -0,0 +1,34 @@ ++; A basic inline assembly test ++ ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++@v2i64_r = global <2 x i64> zeroinitializer, align 16 ++ ++define void @test1() nounwind { ++entry: ++ ; CHECK-LABEL: test1: ++ %0 = call <2 x i64> asm "vldi ${0:w}, 1", "=f"() ++ ; CHECK: vldi $vr{{[1-3]?[0-9]}}, 1 ++ store <2 x i64> %0, <2 x i64>* @v2i64_r ++ ret void ++} ++ ++define void @test2() nounwind { ++entry: ++ ; CHECK-LABEL: test2: ++ %0 = load <2 x i64>, <2 x i64>* @v2i64_r ++ %1 = call <2 x i64> asm "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f"(<2 x i64> %0) ++ ; CHECK: vaddi.wu $vr{{[1-3]?[0-9]}}, $vr{{[1-3]?[0-9]}}, 1 ++ store <2 x i64> %1, <2 x i64>* @v2i64_r ++ ret void ++} ++ ++define void @test3() nounwind { ++entry: ++ ; CHECK-LABEL: test3: ++ %0 = load <2 x i64>, <2 x i64>* @v2i64_r ++ %1 = call <2 x i64> asm sideeffect "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f,~{$vr0}"(<2 x i64> %0) ++ ; CHECK: vaddi.wu $vr{{([1-9]|[1-3][0-9])}}, $vr{{([1-9]|[1-3][0-9])}}, 1 ++ store <2 x i64> %1, <2 x i64>* @v2i64_r ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll +new file mode 100644 +index 000000000..60ff93095 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll +@@ -0,0 +1,92 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: lsx_vsrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %a, <16 x i8> %b, i32 2) ++ ret <16 x i8> %0 ++} ++ ++define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: lsx_vsrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %a, <8 x i16> %b, i32 2) ++ ret <8 x i16> %0 ++} ++ ++define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: lsx_vsrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %a, <4 x i32> %b, i32 2) ++ ret <4 x i32> %0 ++} ++ ++define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: lsx_vsrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %a, <2 x i64> %b, i32 2) ++ ret <2 x i64> %0 ++} ++ ++define <16 x i8> @lsx_vrepli_b() { ++; CHECK-LABEL: lsx_vrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 2) ++ ret <16 x i8> %0 ++} ++ ++define <8 x i16> @lsx_vrepli_h() { ++; CHECK-LABEL: lsx_vrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 2) ++ ret <8 x i16> %0 ++} ++ ++define <4 x i32> @lsx_vrepli_w() { ++; CHECK-LABEL: lsx_vrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 2) ++ ret <4 x i32> %0 ++} ++ ++define <2 x i64> @lsx_vrepli_d() { ++; CHECK-LABEL: lsx_vrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 2 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 2) ++ ret <2 x i64> %0 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll +new file mode 100644 +index 000000000..0dd29b27e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll +@@ -0,0 +1,132 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <2 x i64> @not_v2i64(<2 x i64> %a) { ++; CHECK-LABEL: not_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <2 x i64> %a, ++ ret <2 x i64> %not ++} ++ ++define <4 x i32> @not_v4i32(<4 x i32> %a) { ++; CHECK-LABEL: not_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i32> %a, ++ ret <4 x i32> %not ++} ++ ++define <8 x i16> @not_v8i16(<8 x i16> %a) { ++; CHECK-LABEL: not_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i16> %a, ++ ret <8 x i16> %not ++} ++ ++define <16 x i8> @not_v16i8(<16 x i8> %a) { ++; CHECK-LABEL: not_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vxori.b $vr0, $vr0, 255 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i8> %a, ++ ret <16 x i8> %not ++} ++ ++ ++define <2 x i64> @andn_v2i64(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: andn_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <2 x i64> %b, ++ %and = and <2 x i64> %not, %a ++ ret <2 x i64> %and ++} ++ ++define <4 x i32> @andn_v4i32(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: andn_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i32> %b, ++ %and = and <4 x i32> %not, %a ++ ret <4 x i32> %and ++} ++ ++define <8 x i16> @andn_v8i16(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: andn_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i16> %b, ++ %and = and <8 x i16> %not, %a ++ ret <8 x i16> %and ++} ++ ++define <16 x i8> @andn_v16i8(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: andn_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i8> %b, ++ %and = and <16 x i8> %not, %a ++ ret <16 x i8> %and ++} ++ ++ ++define <2 x i64> @orn_v2i64(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: orn_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <2 x i64> %b, ++ %or = or <2 x i64> %not, %a ++ ret <2 x i64> %or ++} ++ ++define <4 x i32> @orn_v4i32(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: orn_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <4 x i32> %b, ++ %or = or <4 x i32> %not, %a ++ ret <4 x i32> %or ++} ++ ++define <8 x i16> @orn_v8i16(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: orn_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <8 x i16> %b, ++ %or = or <8 x i16> %not, %a ++ ret <8 x i16> %or ++} ++ ++define <16 x i8> @orn_v16i8(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: orn_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %not = xor <16 x i8> %b, ++ %or = or <16 x i8> %not, %a ++ ret <16 x i8> %or ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll b/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll +new file mode 100644 +index 000000000..951254baa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @clri8(<16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: clri8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %shl = shl <16 x i8> , %c ++ %xor = xor <16 x i8> %shl, ++ %and = and <16 x i8> %xor, %b ++ ret <16 x i8> %and ++} ++ ++define <8 x i16> @clri16(<8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: clri16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %shl = shl <8 x i16> , %c ++ %xor = xor <8 x i16> %shl, ++ %and = and <8 x i16> %xor, %b ++ ret <8 x i16> %and ++} ++ ++define <4 x i32> @clri32(<4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: clri32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %shl = shl <4 x i32> , %c ++ %xor = xor <4 x i32> %shl, ++ %and = and <4 x i32> %xor, %b ++ ret <4 x i32> %and ++} ++ ++define <2 x i64> @clri64(<2 x i64> %b, <2 x i64> %c) { ++; CHECK-LABEL: clri64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++entry: ++ %shl = shl <2 x i64> , %c ++ %xor = xor <2 x i64> %shl, ++ %and = and <2 x i64> %xor, %b ++ ret <2 x i64> %and ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll +new file mode 100644 +index 000000000..69f19297d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @seti8(<16 x i8>) { ++; CHECK-LABEL: seti8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vbitseti.b $vr0, $vr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <16 x i8> %0, ++ ret <16 x i8> %2 ++} ++ ++define <8 x i16> @seti16(<8 x i16>) { ++; CHECK-LABEL: seti16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vbitseti.h $vr0, $vr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <8 x i16> %0, ++ ret <8 x i16> %2 ++} ++ ++define <4 x i32> @seti32(<4 x i32>) { ++; CHECK-LABEL: seti32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vbitseti.w $vr0, $vr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <4 x i32> %0, ++ ret <4 x i32> %2 ++} ++ ++define <2 x i64> @seti64(<2 x i64>) { ++; CHECK-LABEL: seti64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vbitseti.d $vr0, $vr0, 6 ++; CHECK-NEXT: jr $ra ++ %2 = or <2 x i64> %0, ++ ret <2 x i64> %2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll b/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll +new file mode 100644 +index 000000000..86201ae0f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll +@@ -0,0 +1,262 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @vabsd_b(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: vabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <16 x i8> %b, %a ++ %subba = sub <16 x i8> %b, %a ++ %subab = sub <16 x i8> %a, %b ++ %select = select <16 x i1> %icmp, <16 x i8> %subba, <16 x i8> %subab ++ ret <16 x i8> %select ++} ++ ++define <8 x i16> @vabsd_h(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: vabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <8 x i16> %b, %a ++ %subba = sub <8 x i16> %b, %a ++ %subab = sub <8 x i16> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i16> %subba, <8 x i16> %subab ++ ret <8 x i16> %select ++} ++ ++define <8 x i8> @vabsd_h_v8i8(<8 x i8> %a, <8 x i8> %b) { ++; CHECK-LABEL: vabsd_h_v8i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <8 x i8> %b, %a ++ %subba = sub <8 x i8> %b, %a ++ %subab = sub <8 x i8> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i8> %subba, <8 x i8> %subab ++ ret <8 x i8> %select ++} ++ ++define <4 x i32> @vabsd_w(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: vabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <4 x i32> %b, %a ++ %subba = sub <4 x i32> %b, %a ++ %subab = sub <4 x i32> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i32> %subba, <4 x i32> %subab ++ ret <4 x i32> %select ++} ++ ++define <4 x i16> @vabsd_w_v4i16(<4 x i16> %a, <4 x i16> %b) { ++; CHECK-LABEL: vabsd_w_v4i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <4 x i16> %b, %a ++ %subba = sub <4 x i16> %b, %a ++ %subab = sub <4 x i16> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i16> %subba, <4 x i16> %subab ++ ret <4 x i16> %select ++} ++ ++define <4 x i8> @vabsd_w_v4i8(<4 x i8> %a, <4 x i8> %b) { ++; CHECK-LABEL: vabsd_w_v4i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <4 x i8> %b, %a ++ %subba = sub <4 x i8> %b, %a ++ %subab = sub <4 x i8> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i8> %subba, <4 x i8> %subab ++ ret <4 x i8> %select ++} ++ ++define <2 x i64> @vabsd_d(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: vabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <2 x i64> %b, %a ++ %subba = sub <2 x i64> %b, %a ++ %subab = sub <2 x i64> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i64> %subba, <2 x i64> %subab ++ ret <2 x i64> %select ++} ++ ++define <2 x i32> @vabsd_d_v2i32(<2 x i32> %a, <2 x i32> %b) { ++; CHECK-LABEL: vabsd_d_v2i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <2 x i32> %b, %a ++ %subba = sub <2 x i32> %b, %a ++ %subab = sub <2 x i32> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i32> %subba, <2 x i32> %subab ++ ret <2 x i32> %select ++} ++ ++define <2 x i16> @vabsd_d_v2i16(<2 x i16> %a, <2 x i16> %b) { ++; CHECK-LABEL: vabsd_d_v2i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <2 x i16> %b, %a ++ %subba = sub <2 x i16> %b, %a ++ %subab = sub <2 x i16> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i16> %subba, <2 x i16> %subab ++ ret <2 x i16> %select ++} ++ ++define <2 x i8> @vabsd_d_v2i8(<2 x i8> %a, <2 x i8> %b) { ++; CHECK-LABEL: vabsd_d_v2i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp sgt <2 x i8> %b, %a ++ %subba = sub <2 x i8> %b, %a ++ %subab = sub <2 x i8> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i8> %subba, <2 x i8> %subab ++ ret <2 x i8> %select ++} ++ ++define <16 x i8> @vabsd_bu(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: vabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <16 x i8> %b, %a ++ %subba = sub <16 x i8> %b, %a ++ %subab = sub <16 x i8> %a, %b ++ %select = select <16 x i1> %icmp, <16 x i8> %subba, <16 x i8> %subab ++ ret <16 x i8> %select ++} ++ ++define <8 x i16> @vabsd_hu(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: vabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <8 x i16> %b, %a ++ %subba = sub <8 x i16> %b, %a ++ %subab = sub <8 x i16> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i16> %subba, <8 x i16> %subab ++ ret <8 x i16> %select ++} ++ ++define <8 x i8> @vabsd_hu_v8i8(<8 x i8> %a, <8 x i8> %b) { ++; CHECK-LABEL: vabsd_hu_v8i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <8 x i8> %b, %a ++ %subba = sub <8 x i8> %b, %a ++ %subab = sub <8 x i8> %a, %b ++ %select = select <8 x i1> %icmp, <8 x i8> %subba, <8 x i8> %subab ++ ret <8 x i8> %select ++} ++ ++define <4 x i32> @vabsd_wu(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: vabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <4 x i32> %b, %a ++ %subba = sub <4 x i32> %b, %a ++ %subab = sub <4 x i32> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i32> %subba, <4 x i32> %subab ++ ret <4 x i32> %select ++} ++ ++define <4 x i16> @vabsd_wu_v4i16(<4 x i16> %a, <4 x i16> %b) { ++; CHECK-LABEL: vabsd_wu_v4i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <4 x i16> %b, %a ++ %subba = sub <4 x i16> %b, %a ++ %subab = sub <4 x i16> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i16> %subba, <4 x i16> %subab ++ ret <4 x i16> %select ++} ++ ++define <4 x i8> @vabsd_wu_v4i8(<4 x i8> %a, <4 x i8> %b) { ++; CHECK-LABEL: vabsd_wu_v4i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <4 x i8> %b, %a ++ %subba = sub <4 x i8> %b, %a ++ %subab = sub <4 x i8> %a, %b ++ %select = select <4 x i1> %icmp, <4 x i8> %subba, <4 x i8> %subab ++ ret <4 x i8> %select ++} ++ ++define <2 x i64> @vabsd_du(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: vabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <2 x i64> %b, %a ++ %subba = sub <2 x i64> %b, %a ++ %subab = sub <2 x i64> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i64> %subba, <2 x i64> %subab ++ ret <2 x i64> %select ++} ++ ++define <2 x i32> @vabsd_du_v2i32(<2 x i32> %a, <2 x i32> %b) { ++; CHECK-LABEL: vabsd_du_v2i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <2 x i32> %b, %a ++ %subba = sub <2 x i32> %b, %a ++ %subab = sub <2 x i32> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i32> %subba, <2 x i32> %subab ++ ret <2 x i32> %select ++} ++ ++define <2 x i16> @vabsd_du_v2i16(<2 x i16> %a, <2 x i16> %b) { ++; CHECK-LABEL: vabsd_du_v2i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <2 x i16> %b, %a ++ %subba = sub <2 x i16> %b, %a ++ %subab = sub <2 x i16> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i16> %subba, <2 x i16> %subab ++ ret <2 x i16> %select ++} ++ ++define <2 x i8> @vabsd_du_v2i8(<2 x i8> %a, <2 x i8> %b) { ++; CHECK-LABEL: vabsd_du_v2i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: jr $ra ++entry: ++ %icmp = icmp ugt <2 x i8> %b, %a ++ %subba = sub <2 x i8> %b, %a ++ %subab = sub <2 x i8> %a, %b ++ %select = select <2 x i1> %icmp, <2 x i8> %subba, <2 x i8> %subab ++ ret <2 x i8> %select ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vadda.ll b/llvm/test/CodeGen/LoongArch/lsx/vadda.ll +new file mode 100644 +index 000000000..4c987fb1b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/vadda.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @vaddab(<16 x i8>, <16 x i8>) { ++; CHECK-LABEL: vaddab: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <16 x i8> %0, zeroinitializer ++ %4 = sub <16 x i8> zeroinitializer, %0 ++ %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %0 ++ %6 = icmp slt <16 x i8> %1, zeroinitializer ++ %7 = sub <16 x i8> zeroinitializer, %1 ++ %8 = select <16 x i1> %6, <16 x i8> %7, <16 x i8> %1 ++ %9 = add <16 x i8> %5, %8 ++ ret <16 x i8> %9 ++} ++ ++define <8 x i16> @vaddah(<8 x i16>, <8 x i16>) { ++; CHECK-LABEL: vaddah: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <8 x i16> %0, zeroinitializer ++ %4 = sub <8 x i16> zeroinitializer, %0 ++ %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %0 ++ %6 = icmp slt <8 x i16> %1, zeroinitializer ++ %7 = sub <8 x i16> zeroinitializer, %1 ++ %8 = select <8 x i1> %6, <8 x i16> %7, <8 x i16> %1 ++ %9 = add <8 x i16> %5, %8 ++ ret <8 x i16> %9 ++} ++ ++define <4 x i32> @vaddaw(<4 x i32>, <4 x i32>) { ++; CHECK-LABEL: vaddaw: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <4 x i32> %0, zeroinitializer ++ %4 = sub nsw <4 x i32> zeroinitializer, %0 ++ %5 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %0 ++ %6 = icmp slt <4 x i32> %1, zeroinitializer ++ %7 = sub nsw <4 x i32> zeroinitializer, %1 ++ %8 = select <4 x i1> %6, <4 x i32> %7, <4 x i32> %1 ++ %9 = add nuw nsw <4 x i32> %5, %8 ++ ret <4 x i32> %9 ++} ++ ++define <2 x i64> @vaddad(<2 x i64>, <2 x i64>) { ++; CHECK-LABEL: vaddad: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = icmp slt <2 x i64> %0, zeroinitializer ++ %4 = sub nsw <2 x i64> zeroinitializer, %0 ++ %5 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %0 ++ %6 = icmp slt <2 x i64> %1, zeroinitializer ++ %7 = sub nsw <2 x i64> zeroinitializer, %1 ++ %8 = select <2 x i1> %6, <2 x i64> %7, <2 x i64> %1 ++ %9 = add nuw nsw <2 x i64> %5, %8 ++ ret <2 x i64> %9 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lu12i.ll b/llvm/test/CodeGen/LoongArch/lu12i.ll +new file mode 100644 +index 000000000..55fd40edd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lu12i.ll +@@ -0,0 +1,7 @@ ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define i32 @foo() { ++; CHECK: lu12i.w $r4, -1 ++entry: ++ ret i32 -4096 ++} +diff --git a/llvm/test/CodeGen/LoongArch/mcpu_load.ll b/llvm/test/CodeGen/LoongArch/mcpu_load.ll +new file mode 100644 +index 000000000..c0c782fea +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/mcpu_load.ll +@@ -0,0 +1,72 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mcpu=la264 -o - %s | FileCheck -check-prefix=ALIGNED %s ++; RUN: llc -march=loongarch64 -mcpu=la364 -o - %s | FileCheck -check-prefix=ALIGNED %s ++; RUN: llc -march=loongarch64 -mcpu=la464 -o - %s | FileCheck -check-prefix=UNALIGNED %s ++ ++define i32 @i32_load(i32* %p) { ++; ALIGNED-LABEL: i32_load: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.hu $r4, $r4, 2 ++; ALIGNED-NEXT: slli.w $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++; ++; UNALIGNED-LABEL: i32_load: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.w $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define signext i32 @i32_sextload(i32* %p) { ++; ALIGNED-LABEL: i32_sextload: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.h $r4, $r4, 2 ++; ALIGNED-NEXT: slli.d $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++; ++; UNALIGNED-LABEL: i32_sextload: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.w $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define zeroext i32 @i32_zextload(i32* %p) { ++; ALIGNED-LABEL: i32_zextload: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.hu $r4, $r4, 2 ++; ALIGNED-NEXT: slli.d $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++; ++; UNALIGNED-LABEL: i32_zextload: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.wu $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define i64 @i64_load(i64* %p) { ++; ALIGNED-LABEL: i64_load: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.wu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.wu $r4, $r4, 4 ++; ALIGNED-NEXT: slli.d $r4, $r4, 32 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++; ++; UNALIGNED-LABEL: i64_load: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.d $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++ %tmp = load i64, i64* %p, align 4 ++ ret i64 %tmp ++} +diff --git a/llvm/test/CodeGen/LoongArch/misc.mir b/llvm/test/CodeGen/LoongArch/misc.mir +deleted file mode 100644 +index 56793c583..000000000 +--- a/llvm/test/CodeGen/LoongArch/misc.mir ++++ /dev/null +@@ -1,200 +0,0 @@ +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ +-# RUN: | extract-section .text \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ENC +-# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ +-# RUN: | FileCheck %s -check-prefix=CHECK-ASM +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: I15 +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------------+--------------------------------------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------------+--------------------------------------------- +-# opcode | imm15 +-# ---------------------------------------------------+--------------------------------------------- +- +---- +-# CHECK-LABEL: test_DBAR: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +-# CHECK-ASM: dbar 0 +-name: test_DBAR +-body: | +- bb.0: +- DBAR 0 +-... +---- +-# CHECK-LABEL: test_IBAR: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +-# CHECK-ASM: ibar 0 +-name: test_IBAR +-body: | +- bb.0: +- IBAR 0 +-... +---- +-# CHECK-LABEL: test_SYSCALL: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 +-# CHECK-ASM: syscall 100 +-name: test_SYSCALL +-body: | +- bb.0: +- SYSCALL 100 +-... +---- +-# CHECK-LABEL: test_BREAK: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 +-# CHECK-ASM: break 199 +-name: test_BREAK +-body: | +- bb.0: +- BREAK 199 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: I26 +-# ------------------------------------------------------------------------------------------------- +-# ------------------+-----------------------------------------------+------------------------------ +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------+-----------------------------------------------+------------------------------ +-# opcode | imm26{15-0} | imm26{25-16} +-# ------------------+-----------------------------------------------+------------------------------ +- +---- +-# CHECK-LABEL: test_B: +-# CHECK-ENC: 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 +-# CHECK-ASM: b 80 +-name: test_B +-body: | +- bb.0: +- B 80 +-... +---- +-# CHECK-LABEL: test_BL: +-# CHECK-ENC: 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 +-# CHECK-ASM: bl 136 +-name: test_BL +-body: | +- bb.0: +- BL 136 +-... +- +-# -------------------------------------------------------------------------------------------------------- +-# Encoding format: BSTR_W +-# -------------------------------------------------------------------------------------------------------- +-# ---------------------------------+--------------+---------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------+--------------+---------+--------------+--------------+--------------- +-# opcode{11-1} | msb |opcode{0}| lsb | rj | rd +-# ---------------------------------+--------------+---------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_BSTRINS_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bstrins.w $a0, $a1, 7, 2 +-name: test_BSTRINS_W +-body: | +- bb.0: +- $r4 = BSTRINS_W $r4, $r5, 7, 2 +-... +---- +-# CHECK-LABEL: test_BSTRPICK_W: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bstrpick.w $a0, $a1, 10, 4 +-name: test_BSTRPICK_W +-body: | +- bb.0: +- $r4 = BSTRPICK_W $r5, 10, 4 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: BSTR_D +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------+-----------------+-----------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------+-----------------+-----------------+--------------+--------------- +-# opcode | msb | lsb | rj | rd +-# ------------------------------+-----------------+-----------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_BSTRINS_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bstrins.d $a0, $a1, 7, 2 +-name: test_BSTRINS_D +-body: | +- bb.0: +- $r4 = BSTRINS_D $r4, $r5, 7, 2 +-... +---- +-# CHECK-LABEL: test_BSTRPICK_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 +-# CHECK-ASM: bstrpick.d $a0, $a1, 39, 22 +-name: test_BSTRPICK_D +-body: | +- bb.0: +- $r4 = BSTRPICK_D $r5, 39, 22 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: ASRT +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------------+--------------+--------------+--------------- +-# opcode | rk | rj | 0x0 +-# ---------------------------------------------------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_ASRTLE_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 +-# CHECK-ASM: asrtle.d $a0, $a1 +-name: test_ASRTLE_D +-body: | +- bb.0: +- ASRTLE_D $r4, $r5 +-... +---- +-# CHECK-LABEL: test_ASRTGT_D: +-# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 +-# CHECK-ASM: asrtgt.d $a0, $a1 +-name: test_ASRTGT_D +-body: | +- bb.0: +- ASRTGT_D $r4, $r5 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: PRELD +-# ------------------------------------------------------------------------------------------------- +-# ------------------------------+-----------------------------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ------------------------------+-----------------------------------+--------------+--------------- +-# opcode | imm12 | rj | imm5 +-# ------------------------------+-----------------------------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_PRELD: +-# CHECK-ENC: 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 1 1 +-# CHECK-ASM: preld 15, $a0, 21 +-name: test_PRELD +-body: | +- bb.0: +- PRELD 15, $r4, 21 +-... +- +-# ------------------------------------------------------------------------------------------------- +-# Encoding format: PRELDX +-# ------------------------------------------------------------------------------------------------- +-# ---------------------------------------------------+--------------+--------------+--------------- +-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +-# ---------------------------------------------------+--------------+--------------+--------------- +-# opcode | rk | rj | imm5 +-# ---------------------------------------------------+--------------+--------------+--------------- +- +---- +-# CHECK-LABEL: test_PRELDX: +-# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 1 +-# CHECK-ASM: preldx 11, $a0, $a1 +-name: test_PRELDX +-body: | +- bb.0: +- PRELDX 11, $r4, $r5 +diff --git a/llvm/test/CodeGen/LoongArch/named-register.ll b/llvm/test/CodeGen/LoongArch/named-register.ll +new file mode 100644 +index 000000000..0b0660fca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/named-register.ll +@@ -0,0 +1,29 @@ ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define i64 @get_r2() { ++; CHECK-LABEL: get_r2: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: move $r4, $tp ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = call i64 @llvm.read_register.i64(metadata !0) ++ ret i64 %0 ++} ++ ++define i64 @get_r21() { ++; CHECK-LABEL: get_r21: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: move $r4, $r21 ++; CHECK-NEXT: jr $ra ++entry: ++ %0 = call i64 @llvm.read_register.i64(metadata !1) ++ ret i64 %0 ++} ++ ++declare i64 @llvm.read_register.i64(metadata) ++ ++!llvm.named.register.$r2 = !{!0} ++!llvm.named.register.$r21 = !{!1} ++ ++!0 = !{!"$r2"} ++!1 = !{!"$r21"} +diff --git a/llvm/test/CodeGen/LoongArch/nomerge.ll b/llvm/test/CodeGen/LoongArch/nomerge.ll +new file mode 100644 +index 000000000..a8ce63225 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/nomerge.ll +@@ -0,0 +1,35 @@ ++; RUN: llc < %s -mtriple=loongarch64 -relocation-model=pic -o - | FileCheck %s ++ ++define void @foo(i32 %i) { ++entry: ++ switch i32 %i, label %if.end3 [ ++ i32 5, label %if.then ++ i32 7, label %if.then2 ++ ] ++ ++if.then: ++ tail call void @bar() #0 ++ br label %if.end3 ++ ++if.then2: ++ tail call void @bar() #0 ++ br label %if.end3 ++ ++if.end3: ++ tail call void @bar() #0 ++ ret void ++} ++ ++declare void @bar() ++ ++attributes #0 = { nomerge } ++ ++; CHECK-LABEL: foo: ++; CHECK: # %bb.0: # %entry ++; CHECK: .LBB0_1: # %entry ++; CHECK: .LBB0_2: # %if.then ++; CHECK-NEXT: bl bar ++; CHECK: .LBB0_3: # %if.then2 ++; CHECK-NEXT: bl bar ++; CHECK: .LBB0_4: # %if.end3 ++; CHECK: b bar +diff --git a/llvm/test/CodeGen/LoongArch/noti32.ll b/llvm/test/CodeGen/LoongArch/noti32.ll +new file mode 100644 +index 000000000..9aa8c4391 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/noti32.ll +@@ -0,0 +1,143 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define i8 @nor_i8(i8 %a, i8 %b) nounwind { ++; CHECK-LABEL: nor_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: or $r4, $r4, $r5 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %or = or i8 %a, %b ++ %neg = xor i8 %or, -1 ++ ret i8 %neg ++} ++ ++define i16 @nor_i16(i16 %a, i16 %b) nounwind { ++; CHECK-LABEL: nor_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: or $r4, $r4, $r5 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %or = or i16 %a, %b ++ %neg = xor i16 %or, -1 ++ ret i16 %neg ++} ++ ++define i32 @nor_i32(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: nor_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: or $r4, $r4, $r5 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %or = or i32 %a, %b ++ %neg = xor i32 %or, -1 ++ ret i32 %neg ++} ++ ++define i8 @nor_zero_i8(i8 %a) nounwind { ++; CHECK-LABEL: nor_zero_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i8 %a, -1 ++ ret i8 %neg ++} ++ ++define i16 @nor_zero_i16(i16 %a) nounwind { ++; CHECK-LABEL: nor_zero_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i16 %a, -1 ++ ret i16 %neg ++} ++ ++define i32 @nor_zero_i32(i32 %a) nounwind { ++; CHECK-LABEL: nor_zero_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: nor $r4, $zero, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i32 %a, -1 ++ ret i32 %neg ++} ++ ++define i8 @orn_i8(i8 %a, i8 %b) nounwind { ++; CHECK-LABEL: orn_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: nor $r5, $zero, $r5 ++; CHECK-NEXT: or $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i8 %b, -1 ++ %or = or i8 %neg, %a ++ ret i8 %or ++} ++ ++define i16 @orn_i16(i16 %a, i16 %b) nounwind { ++; CHECK-LABEL: orn_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: nor $r5, $zero, $r5 ++; CHECK-NEXT: or $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i16 %b, -1 ++ %or = or i16 %neg, %a ++ ret i16 %or ++} ++ ++define i32 @orn_i32(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: orn_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: nor $r5, $zero, $r5 ++; CHECK-NEXT: or $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++ %neg = xor i32 %b, -1 ++ %or = or i32 %neg, %a ++ ret i32 %or ++} ++ ++define i8 @andn_i8(i8 %a, i8 %b) nounwind { ++; CHECK-LABEL: andn_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: andn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %neg = xor i8 %b, -1 ++ %and = and i8 %neg, %a ++ ret i8 %and ++} ++ ++define i16 @andn_i16(i16 %a, i16 %b) nounwind { ++; CHECK-LABEL: andn_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: andn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %neg = xor i16 %b, -1 ++ %and = and i16 %neg, %a ++ ret i16 %and ++} ++ ++define i32 @andn_i32(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: andn_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: andn $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %neg = xor i32 %b, -1 ++ %and = and i32 %neg, %a ++ ret i32 %and ++} +diff --git a/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll b/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll +new file mode 100644 +index 000000000..541ea4256 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll +@@ -0,0 +1,100 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define i8 @load_i8() nounwind { ++; CHECK-LABEL: load_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.bu $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 ++ ret i8 %a ++} ++define signext i8 @load_i8_sext() nounwind { ++; CHECK-LABEL: load_i8_sext: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 ++ ret i8 %a ++} ++ ++define i16 @load_i16() nounwind { ++; CHECK-LABEL: load_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.hu $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 ++ ret i16 %a ++} ++ ++define signext i16 @load_i16_sext() nounwind { ++; CHECK-LABEL: load_i16_sext: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 ++ ret i16 %a ++} ++ ++define i32 @load_i32() nounwind { ++; CHECK-LABEL: load_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 ++ ret i32 %a ++} ++ ++define signext i32 @load_i32_sext() nounwind { ++; CHECK-LABEL: load_i32_sext: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 ++ ret i32 %a ++} ++ ++define i64 @load_i64() nounwind { ++; CHECK-LABEL: load_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ %a = load i64, i64* inttoptr (i64 40 to i64*), align 8 ++ ret i64 %a ++} ++ ++define void @store_i8(i8 %v) nounwind { ++; CHECK-LABEL: store_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.b $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ store i8 %v, i8* inttoptr (i64 40 to i8*), align 8 ++ ret void ++} ++ ++define void @store_i16(i16 %v) nounwind { ++; CHECK-LABEL: store_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.h $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ store i16 %v, i16* inttoptr (i64 40 to i16*), align 8 ++ ret void ++} ++ ++define void @store_i32(i32 %v) nounwind { ++; CHECK-LABEL: store_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.w $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ store i32 %v, i32* inttoptr (i64 40 to i32*), align 8 ++ ret void ++} ++ ++define void @store_i64(i64 %v) nounwind { ++; CHECK-LABEL: store_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.d $r4, $zero, 40 ++; CHECK-NEXT: jr $ra ++ store i64 %v, i64* inttoptr (i64 40 to i64*), align 8 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll +deleted file mode 100644 +index 1878e0ed2..000000000 +--- a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll ++++ /dev/null +@@ -1,255 +0,0 @@ +-; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +- +-;; This test checks that unnecessary masking of shift amount operands is +-;; eliminated during instruction selection. The test needs to ensure that the +-;; masking is not removed if it may affect the shift amount. +- +-define i32 @sll_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: sll_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sll_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: sll.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 31 +- %2 = shl i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @sll_non_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: sll_non_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a1, $a1, 15 +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sll_non_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a1, $a1, 15 +-; LA64-NEXT: sll.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 15 +- %2 = shl i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @srl_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: srl_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srl_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: srl.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 4095 +- %2 = lshr i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @srl_non_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: srl_non_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a1, $a1, 7 +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srl_non_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a1, $a1, 7 +-; LA64-NEXT: srl.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 7 +- %2 = lshr i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @sra_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: sra_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sra_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: sra.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 65535 +- %2 = ashr i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @sra_non_redundant_mask(i32 %a, i32 %b) { +-; LA32-LABEL: sra_non_redundant_mask: +-; LA32: # %bb.0: +-; LA32-NEXT: andi $a1, $a1, 32 +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sra_non_redundant_mask: +-; LA64: # %bb.0: +-; LA64-NEXT: andi $a1, $a1, 32 +-; LA64-NEXT: sra.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = and i32 %b, 32 +- %2 = ashr i32 %a, %1 +- ret i32 %2 +-} +- +-define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) { +-; LA32-LABEL: sll_redundant_mask_zeros: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a1, $a1, 1 +-; LA32-NEXT: sll.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sll_redundant_mask_zeros: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 1 +-; LA64-NEXT: sll.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i32 %b, 1 +- %2 = and i32 %1, 30 +- %3 = shl i32 %a, %2 +- ret i32 %3 +-} +- +-define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) { +-; LA32-LABEL: srl_redundant_mask_zeros: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a1, $a1, 2 +-; LA32-NEXT: srl.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srl_redundant_mask_zeros: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 2 +-; LA64-NEXT: srl.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i32 %b, 2 +- %2 = and i32 %1, 28 +- %3 = lshr i32 %a, %2 +- ret i32 %3 +-} +- +-define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) { +-; LA32-LABEL: sra_redundant_mask_zeros: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a1, $a1, 3 +-; LA32-NEXT: sra.w $a0, $a0, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sra_redundant_mask_zeros: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 3 +-; LA64-NEXT: sra.w $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i32 %b, 3 +- %2 = and i32 %1, 24 +- %3 = ashr i32 %a, %2 +- ret i32 %3 +-} +- +-define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) { +-; LA32-LABEL: sll_redundant_mask_zeros_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a2, $a2, 2 +-; LA32-NEXT: srli.w $a3, $a0, 1 +-; LA32-NEXT: andi $a4, $a2, 60 +-; LA32-NEXT: xori $a5, $a4, 31 +-; LA32-NEXT: srl.w $a3, $a3, $a5 +-; LA32-NEXT: sll.w $a1, $a1, $a2 +-; LA32-NEXT: or $a1, $a1, $a3 +-; LA32-NEXT: addi.w $a3, $a4, -32 +-; LA32-NEXT: slti $a4, $a3, 0 +-; LA32-NEXT: maskeqz $a1, $a1, $a4 +-; LA32-NEXT: sll.w $a5, $a0, $a3 +-; LA32-NEXT: masknez $a4, $a5, $a4 +-; LA32-NEXT: or $a1, $a1, $a4 +-; LA32-NEXT: sll.w $a0, $a0, $a2 +-; LA32-NEXT: srai.w $a2, $a3, 31 +-; LA32-NEXT: and $a0, $a2, $a0 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sll_redundant_mask_zeros_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 2 +-; LA64-NEXT: sll.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i64 %b, 2 +- %2 = and i64 %1, 60 +- %3 = shl i64 %a, %2 +- ret i64 %3 +-} +- +-define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) { +-; LA32-LABEL: srl_redundant_mask_zeros_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a2, $a2, 3 +-; LA32-NEXT: slli.w $a3, $a1, 1 +-; LA32-NEXT: andi $a4, $a2, 56 +-; LA32-NEXT: xori $a5, $a4, 31 +-; LA32-NEXT: sll.w $a3, $a3, $a5 +-; LA32-NEXT: srl.w $a0, $a0, $a2 +-; LA32-NEXT: or $a0, $a0, $a3 +-; LA32-NEXT: addi.w $a3, $a4, -32 +-; LA32-NEXT: slti $a4, $a3, 0 +-; LA32-NEXT: maskeqz $a0, $a0, $a4 +-; LA32-NEXT: srl.w $a5, $a1, $a3 +-; LA32-NEXT: masknez $a4, $a5, $a4 +-; LA32-NEXT: or $a0, $a0, $a4 +-; LA32-NEXT: srl.w $a1, $a1, $a2 +-; LA32-NEXT: srai.w $a2, $a3, 31 +-; LA32-NEXT: and $a1, $a2, $a1 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: srl_redundant_mask_zeros_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 3 +-; LA64-NEXT: srl.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i64 %b, 3 +- %2 = and i64 %1, 56 +- %3 = lshr i64 %a, %2 +- ret i64 %3 +-} +- +-define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) { +-; LA32-LABEL: sra_redundant_mask_zeros_i64: +-; LA32: # %bb.0: +-; LA32-NEXT: slli.w $a3, $a2, 4 +-; LA32-NEXT: srai.w $a2, $a1, 31 +-; LA32-NEXT: andi $a4, $a3, 48 +-; LA32-NEXT: addi.w $a5, $a4, -32 +-; LA32-NEXT: slti $a6, $a5, 0 +-; LA32-NEXT: masknez $a2, $a2, $a6 +-; LA32-NEXT: sra.w $a7, $a1, $a3 +-; LA32-NEXT: maskeqz $a7, $a7, $a6 +-; LA32-NEXT: or $a2, $a7, $a2 +-; LA32-NEXT: srl.w $a0, $a0, $a3 +-; LA32-NEXT: xori $a3, $a4, 31 +-; LA32-NEXT: slli.w $a4, $a1, 1 +-; LA32-NEXT: sll.w $a3, $a4, $a3 +-; LA32-NEXT: or $a0, $a0, $a3 +-; LA32-NEXT: sra.w $a1, $a1, $a5 +-; LA32-NEXT: maskeqz $a0, $a0, $a6 +-; LA32-NEXT: masknez $a1, $a1, $a6 +-; LA32-NEXT: or $a0, $a0, $a1 +-; LA32-NEXT: move $a1, $a2 +-; LA32-NEXT: jirl $zero, $ra, 0 +-; +-; LA64-LABEL: sra_redundant_mask_zeros_i64: +-; LA64: # %bb.0: +-; LA64-NEXT: slli.d $a1, $a1, 4 +-; LA64-NEXT: sra.d $a0, $a0, $a1 +-; LA64-NEXT: jirl $zero, $ra, 0 +- %1 = shl i64 %b, 4 +- %2 = and i64 %1, 48 +- %3 = ashr i64 %a, %2 +- ret i64 %3 +-} +diff --git a/llvm/test/CodeGen/LoongArch/signext.ll b/llvm/test/CodeGen/LoongArch/signext.ll +new file mode 100644 +index 000000000..13c710d14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/signext.ll +@@ -0,0 +1,37 @@ ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define i32 @foo(i32 signext %a) { ++; CHECK-LABEL: foo: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ ret i32 %a ++} ++ ++define signext i32 @foo1() { ++; CHECK-LABEL: foo1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $r4, $zero, 0 ++; CHECK-NEXT: ori $r5, $zero, 896 ++; CHECK-NEXT: move $r6, $r4 ++; CHECK-NEXT: .LBB1_1: # %for.body ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: add.w $r4, $r4, $r6 ++; CHECK-NEXT: addi.w $r6, $r6, 1 ++; CHECK-NEXT: bne $r6, $r5, .LBB1_1 ++; CHECK-NEXT: # %bb.2: # %for.end ++; CHECK-NEXT: jr $ra ++entry: ++ br label %for.body ++ ++for.body: ++ %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ] ++ %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] ++ %add = add i32 %sum.013, %i.010 ++ %inc = add nuw nsw i32 %i.010, 1 ++ %exitcond = icmp eq i32 %inc, 896 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ++ ret i32 %add ++} +diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll +new file mode 100644 +index 000000000..0a54e0f8f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/stptr.ll +@@ -0,0 +1,52 @@ ++; Check whether st.w/st.d/stptr.w/stptr.d/stx.w/stx.d instructions are properly generated ++; RUN: llc -march=loongarch64 -o - %s | FileCheck %s ++ ++define void @st_w(i32* %p, i32 signext %val) { ++; CHECK: st.w $r5, $r4, 2044 ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i32, i32* %p, i64 511 ++ store i32 %val, i32* %addr, align 4 ++ ret void ++} ++ ++define void @stptr_w(i32* %p, i32 signext %val) { ++; CHECK: stptr.w $r5, $r4, 2048 ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i32, i32* %p, i64 512 ++ store i32 %val, i32* %addr, align 4 ++ ret void ++} ++ ++define void @stx_w(i32* %p, i32 signext %val) { ++; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 ++; CHECK: stx.w $r5, $r4, $r[[REG:[0-9]+]] ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i32, i32* %p, i64 8192 ++ store i32 %val, i32* %addr, align 4 ++ ret void ++} ++ ++define void @st_d(i64* %p, i64 %val) { ++; CHECK: st.d $r5, $r4, 2040 ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i64, i64* %p, i64 255 ++ store i64 %val, i64* %addr, align 8 ++ ret void ++} ++ ++define void @stptr_d(i64* %p, i64 %val) { ++; CHECK: stptr.d $r5, $r4, 2048 ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i64, i64* %p, i64 256 ++ store i64 %val, i64* %addr, align 8 ++ ret void ++} ++ ++define void @stx_d(i64* %p, i64 %val) { ++; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 ++; CHECK: stx.d $r5, $r4, $r[[REG:[0-9]+]] ++; CHECK: jr $ra ++ %addr = getelementptr inbounds i64, i64* %p, i64 4096 ++ store i64 %val, i64* %addr, align 8 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/tailcall-R.ll b/llvm/test/CodeGen/LoongArch/tailcall-R.ll +new file mode 100644 +index 000000000..2445e32ea +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/tailcall-R.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s ++ ++@errors = external local_unnamed_addr global i32, align 4 ++ ++define signext i32 @compare(i8* %x, i8* %y) { ++; CHECK-LABEL: compare: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -32 ++; CHECK-NEXT: .cfi_def_cfa_offset 32 ++; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: .cfi_offset 23, -16 ++; CHECK-NEXT: ld.w $r23, $r5, 0 ++; CHECK-NEXT: ld.d $r6, $r4, 8 ++; CHECK-NEXT: beqz $r23, .LBB0_3 ++; CHECK-NEXT: # %bb.1: # %land.lhs.true ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: st.d $r6, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: ld.d $r5, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: jirl $ra, $r5, 0 ++; CHECK-NEXT: ld.d $r6, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: beqz $r4, .LBB0_3 ++; CHECK-NEXT: # %bb.2: # %if.then ++; CHECK-NEXT: la.got $r4, errors ++; CHECK-NEXT: # la expanded slot ++; CHECK-NEXT: ld.w $r5, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $r5, 1 ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: .LBB0_3: # %if.end ++; CHECK-NEXT: move $r4, $r23 ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 32 ++; CHECK-NEXT: jr $r6 ++entry: ++ %compare = getelementptr inbounds i8, i8* %x, i64 8 ++ %0 = bitcast i8* %compare to i32 (i32)** ++ %1 = load i32 (i32)*, i32 (i32)** %0, align 8 ++ %elt = bitcast i8* %y to i32* ++ %2 = load i32, i32* %elt, align 8 ++ %cmp = icmp eq i32 %2, 0 ++ br i1 %cmp, label %if.end, label %land.lhs.true ++ ++land.lhs.true: ; preds = %entry ++ %elt3 = bitcast i8* %x to i32* ++ %3 = load i32, i32* %elt3, align 8 ++ %call4 = tail call signext i32 %1(i32 signext %3) ++ %cmp5 = icmp eq i32 %call4, 0 ++ br i1 %cmp5, label %if.end, label %if.then ++ ++if.then: ; preds = %land.lhs.true ++ %4 = load i32, i32* @errors, align 4 ++ %inc = add nsw i32 %4, 1 ++ store i32 %inc, i32* @errors, align 4 ++ br label %if.end ++ ++if.end: ; preds = %if.then, %land.lhs.true, %entry ++ %call6 = tail call signext i32 %1(i32 signext %2) ++ ret i32 %call6 ++} +diff --git a/llvm/test/CodeGen/LoongArch/tailcall-check.ll b/llvm/test/CodeGen/LoongArch/tailcall-check.ll +new file mode 100644 +index 000000000..2b5902d69 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/tailcall-check.ll +@@ -0,0 +1,155 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s ++ ++; Perform tail call optimization for global address. ++declare i32 @callee_tail(i32 %i) ++define i32 @caller_tail(i32 %i) { ++; CHECK-LABEL: caller_tail: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: b callee_tail ++entry: ++ %r = tail call i32 @callee_tail(i32 %i) ++ ret i32 %r ++} ++ ++ ++; Do not tail call optimize functions with varargs. ++declare i32 @callee_varargs(i32, ...) ++define void @caller_varargs(i32 %a, i32 %b) { ++; CHECK-LABEL: caller_varargs: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: move $r6, $r5 ++; CHECK-NEXT: move $r7, $r4 ++; CHECK-NEXT: bl callee_varargs ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++entry: ++ %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a) ++ ret void ++} ++ ++ ++; Do not tail call optimize if stack is used to pass parameters. ++declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ++define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) { ++; CHECK-LABEL: caller_args: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: .cfi_def_cfa_offset 64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: ld.d $r12, $sp, 64 ++; CHECK-NEXT: ld.d $r13, $sp, 72 ++; CHECK-NEXT: ld.d $r14, $sp, 80 ++; CHECK-NEXT: ld.d $r15, $sp, 88 ++; CHECK-NEXT: ld.d $r16, $sp, 96 ++; CHECK-NEXT: ld.d $r17, $sp, 104 ++; CHECK-NEXT: st.d $r17, $sp, 40 ++; CHECK-NEXT: st.d $r16, $sp, 32 ++; CHECK-NEXT: st.d $r15, $sp, 24 ++; CHECK-NEXT: st.d $r14, $sp, 16 ++; CHECK-NEXT: st.d $r13, $sp, 8 ++; CHECK-NEXT: st.d $r12, $sp, 0 ++; CHECK-NEXT: bl callee_args ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++entry: ++ %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ++ ret i32 %r ++} ++ ++ ++; Do not tail call optimize for exception-handling functions. ++declare void @callee_interrupt() ++define void @caller_interrupt() #0 { ++; CHECK-LABEL: caller_interrupt: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: bl callee_interrupt ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++entry: ++ tail call void @callee_interrupt() ++ ret void ++} ++attributes #0 = { "interrupt"="machine" } ++ ++ ++; Do not tail call optimize functions with byval parameters. ++declare i32 @callee_byval(i32** byval(i32*) %a) ++define i32 @caller_byval() { ++; CHECK-LABEL: caller_byval: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -32 ++; CHECK-NEXT: .cfi_def_cfa_offset 32 ++; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: ld.d $r4, $sp, 16 ++; CHECK-NEXT: st.d $r4, $sp, 0 ++; CHECK-NEXT: bl callee_byval ++; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %a = alloca i32* ++ %r = tail call i32 @callee_byval(i32** byval(i32*) %a) ++ ret i32 %r ++} ++ ++ ++; Do not tail call optimize if callee uses structret semantics. ++%struct.A = type { i32 } ++@a = global %struct.A zeroinitializer ++ ++declare void @callee_struct(%struct.A* sret(%struct.A) %a) ++define void @caller_nostruct() { ++; CHECK-LABEL: caller_nostruct: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: la.got $r4, a ++; CHECK-NEXT: # la expanded slot ++; CHECK-NEXT: bl callee_struct ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++entry: ++ tail call void @callee_struct(%struct.A* sret(%struct.A) @a) ++ ret void ++} ++ ++ ++; Do not tail call optimize if caller uses structret semantics. ++declare void @callee_nostruct() ++define void @caller_struct(%struct.A* sret(%struct.A) %a) { ++; CHECK-LABEL: caller_struct: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: .cfi_def_cfa_offset 16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 0 # 8-byte Folded Spill ++; CHECK-NEXT: .cfi_offset 1, -8 ++; CHECK-NEXT: .cfi_offset 23, -16 ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: bl callee_nostruct ++; CHECK-NEXT: move $r4, $r23 ++; CHECK-NEXT: ld.d $r23, $sp, 0 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: jr $ra ++entry: ++ tail call void @callee_nostruct() ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/tailcall-mem.ll b/llvm/test/CodeGen/LoongArch/tailcall-mem.ll +new file mode 100644 +index 000000000..68ddaa899 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/tailcall-mem.ll +@@ -0,0 +1,35 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s ++ ++ ++define void @tail_memcpy(i8* %p, i8* %q, i32 %n) { ++; CHECK-LABEL: tail_memcpy: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: b memcpy ++entry: ++ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ++ ret void ++} ++ ++define void @tail_memmove(i8* %p, i8* %q, i32 %n) { ++; CHECK-LABEL: tail_memmove: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: b memmove ++entry: ++ tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ++ ret void ++} ++ ++define void @tail_memset(i8* %p, i8 %c, i32 %n) { ++; CHECK-LABEL: tail_memset: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: b memset ++entry: ++ tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ++ ret void ++} ++ ++declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) ++declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) ++declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) ++ +diff --git a/llvm/test/CodeGen/LoongArch/tailcall.ll b/llvm/test/CodeGen/LoongArch/tailcall.ll +new file mode 100644 +index 000000000..984df2cb6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/tailcall.ll +@@ -0,0 +1,13 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s ++ ++define void @f() { ++; CHECK-LABEL: f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: b foo ++entry: ++ tail call void bitcast (void (...)* @foo to void ()*)() ++ ret void ++} ++ ++declare void @foo(...) +diff --git a/llvm/test/CodeGen/LoongArch/target_support.ll b/llvm/test/CodeGen/LoongArch/target_support.ll +deleted file mode 100644 +index b7796e633..000000000 +--- a/llvm/test/CodeGen/LoongArch/target_support.ll ++++ /dev/null +@@ -1,3 +0,0 @@ +-; RUN: llc --version | FileCheck %s +-; CHECK: loongarch32 - 32-bit LoongArch +-; CHECK: loongarch64 - 64-bit LoongArch +diff --git a/llvm/test/CodeGen/LoongArch/thread-pointer.ll b/llvm/test/CodeGen/LoongArch/thread-pointer.ll +new file mode 100644 +index 000000000..06a5886c4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/thread-pointer.ll +@@ -0,0 +1,9 @@ ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++declare i8* @llvm.thread.pointer() nounwind readnone ++ ++define i8* @thread_pointer() { ++; CHECK: move $r4, $tp ++ %1 = tail call i8* @llvm.thread.pointer() ++ ret i8* %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/trap.ll b/llvm/test/CodeGen/LoongArch/trap.ll +new file mode 100644 +index 000000000..4a4b54438 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/trap.ll +@@ -0,0 +1,13 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define void @test_trap() nounwind { ++; CHECK-LABEL: test_trap: ++; CHECK: # %bb.0: ++; CHECK-NEXT: break 0 ++; CHECK-NEXT: jr $ra ++ call void @llvm.trap() ++ ret void ++} ++ ++declare void @llvm.trap() +diff --git a/llvm/test/CodeGen/LoongArch/trunc.ll b/llvm/test/CodeGen/LoongArch/trunc.ll +new file mode 100644 +index 000000000..d1b5a3a14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/trunc.ll +@@ -0,0 +1,108 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 < %s | FileCheck %s ++ ++define signext i32 @foo1(i64 %a, i64 %b) { ++; CHECK-LABEL: foo1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: add.w $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %add = add nsw i32 %conv1, %conv ++ ret i32 %add ++} ++ ++define signext i32 @foo2(i64 %a, i64 %b) { ++; CHECK-LABEL: foo2: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: sub.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %sub = sub nsw i32 %conv, %conv1 ++ ret i32 %sub ++} ++ ++define signext i32 @foo3(i64 %a, i64 %b) { ++; CHECK-LABEL: foo3: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: sll.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %shl = shl i32 %conv, %conv1 ++ ret i32 %shl ++} ++ ++define signext i32 @foo4(i64 %a, i64 %b) { ++; CHECK-LABEL: foo4: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: srl.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %shr = lshr i32 %conv, %conv1 ++ ret i32 %shr ++} ++ ++define signext i32 @foo5(i64 %a, i64 %b) { ++; CHECK-LABEL: foo5: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: mul.w $r4, $r5, $r4 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %mul = mul nsw i32 %conv1, %conv ++ ret i32 %mul ++} ++ ++define signext i32 @foo6(i64 %a, i64 %b) { ++; CHECK-LABEL: foo6: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: sra.w $r4, $r4, $r5 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %shr = ashr i32 %conv, %conv1 ++ ret i32 %shr ++} ++ ++define signext i32 @sdiv(i64 %a, i64 %b) { ++; CHECK-LABEL: sdiv: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: div.w $r4, $r4, $r5 ++; CHECK-NEXT: bne $r5, $zero, 8 ++; CHECK-NEXT: break 7 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %div = sdiv i32 %conv, %conv1 ++ ret i32 %div ++} ++ ++define signext i32 @udiv(i64 %a, i64 %b) { ++; CHECK-LABEL: udiv: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: div.wu $r4, $r4, $r5 ++; CHECK-NEXT: bne $r5, $zero, 8 ++; CHECK-NEXT: break 7 ++; CHECK-NEXT: slli.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++entry: ++ %conv = trunc i64 %a to i32 ++ %conv1 = trunc i64 %b to i32 ++ %div = udiv i32 %conv, %conv1 ++ ret i32 %div ++} +diff --git a/llvm/test/CodeGen/LoongArch/unalignment.ll b/llvm/test/CodeGen/LoongArch/unalignment.ll +new file mode 100644 +index 000000000..d468a361f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/unalignment.ll +@@ -0,0 +1,72 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -o - %s | FileCheck -check-prefix=UNALIGNED %s ++; RUN: llc -march=loongarch64 -mattr=+unaligned-access -o - %s | FileCheck -check-prefix=UNALIGNED %s ++; RUN: llc -march=loongarch64 -mattr=-unaligned-access -o - %s | FileCheck -check-prefix=ALIGNED %s ++ ++define i32 @i32_load(i32* %p) { ++; UNALIGNED-LABEL: i32_load: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.w $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++; ++; ALIGNED-LABEL: i32_load: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.hu $r4, $r4, 2 ++; ALIGNED-NEXT: slli.w $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define signext i32 @i32_sextload(i32* %p) { ++; UNALIGNED-LABEL: i32_sextload: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.w $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++; ++; ALIGNED-LABEL: i32_sextload: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.h $r4, $r4, 2 ++; ALIGNED-NEXT: slli.d $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define zeroext i32 @i32_zextload(i32* %p) { ++; UNALIGNED-LABEL: i32_zextload: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.wu $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++; ++; ALIGNED-LABEL: i32_zextload: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.hu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.hu $r4, $r4, 2 ++; ALIGNED-NEXT: slli.d $r4, $r4, 16 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++ %tmp = load i32, i32* %p, align 2 ++ ret i32 %tmp ++} ++ ++define i64 @i64_load(i64* %p) { ++; UNALIGNED-LABEL: i64_load: ++; UNALIGNED: # %bb.0: ++; UNALIGNED-NEXT: ld.d $r4, $r4, 0 ++; UNALIGNED-NEXT: jr $ra ++; ++; ALIGNED-LABEL: i64_load: ++; ALIGNED: # %bb.0: ++; ALIGNED-NEXT: ld.wu $r5, $r4, 0 ++; ALIGNED-NEXT: ld.wu $r4, $r4, 4 ++; ALIGNED-NEXT: slli.d $r4, $r4, 32 ++; ALIGNED-NEXT: or $r4, $r4, $r5 ++; ALIGNED-NEXT: jr $ra ++ %tmp = load i64, i64* %p, align 4 ++ ret i64 %tmp ++} +diff --git a/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s b/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s +new file mode 100644 +index 000000000..ea58521cb +--- /dev/null ++++ b/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s +@@ -0,0 +1,26 @@ ++# REQUIRES: loongarch-registered-target ++## Ignore empty name symbols. ++ ++# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t ++# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYM ++ ++# SYM: 0000000000000004 0 NOTYPE LOCAL DEFAULT [[#]] {{$}} ++# SYM: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [[#]] foo ++ ++## Make sure we test at an address larger than or equal to an empty name symbol. ++# RUN: llvm-symbolizer --obj=%t 0 4 | FileCheck %s ++ ++# CHECK: foo ++# CHECK-NEXT: ??:0:0 ++# CHECK-EMPTY: ++# CHECK-NEXT: foo ++# CHECK-NEXT: ??:0:0 ++ ++.globl foo ++foo: ++ nop ++ .file 1 "/tmp" "a.s" ++ .loc 1 1 0 ++ nop ++ ++.section .debug_line,"",@progbits +diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +index caeae4fa4..56f391b7d 100644 +--- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s ++++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +@@ -1,3 +1,4 @@ ++# UNSUPPORTED: loongarch64 + # RUN: rm -rf %t && mkdir -p %t + # RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \ + # RUN: -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s +diff --git a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg +index e2535ef1d..09f1a2ab6 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg ++++ b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg +@@ -1,7 +1,8 @@ + root = config.root + targets = root.targets + if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \ +- ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets): ++ ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets) | \ ++ ('LoongArch' in targets) : + config.unsupported = False + else: + config.unsupported = True +@@ -9,7 +10,7 @@ else: + # FIXME: autoconf and cmake produce different arch names. We should normalize + # them before getting here. + if root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', +- 'AArch64', 'ARM', 'Mips', ++ 'AArch64', 'ARM', 'Mips', 'loongarch64', + 'PowerPC', 'ppc64', 'ppc64le', 'SystemZ']: + config.unsupported = True + +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +index 7a1731e74..3023f7481 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll +index e25fd710b..ccc138922 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; REQUIRES: cxx-shared-library + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: arm, cygwin, windows-msvc, windows-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +index f458ab79f..117df54e2 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +index b8684a17a..a0bc1c2f2 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +index 060b5e132..05c3e96f2 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +index 6e60396e2..c6073ea02 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +index b6fae4600..78b9be1cb 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +index 34f72bc93..699d4a5de 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ + ; RUN: -relocation-model=pic -code-model=small %s > /dev/null + ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +index 9e76601c9..94e3a94e5 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +index 20f232add..5a92c2353 100644 +--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll ++++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +@@ -1,3 +1,4 @@ ++;UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ + ; RUN: -O0 -relocation-model=pic -code-model=small %s + ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc +diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll +new file mode 100644 +index 000000000..0a920808d +--- /dev/null ++++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll +@@ -0,0 +1,33 @@ ++; REQUIRES: asserts ++; RUN: %lli --jit-kind=mcjit %s > /dev/null ++@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1 ++ ++; Function Attrs: noinline nounwind optnone ++define signext i32 @main() !dbg !8 { ++entry: ++ %retval = alloca i32, align 4 ++ store i32 0, i32* %retval, align 4 ++ %call = call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0)), !dbg !12 ++ ret i32 0, !dbg !13 ++} ++ ++declare signext i32 @printf(i8*, ...) ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4, !5, !6} ++!llvm.ident = !{!7} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "hello.c", directory: "/") ++!2 = !{} ++!3 = !{i32 7, !"Dwarf Version", i32 4} ++!4 = !{i32 2, !"Debug Info Version", i32 3} ++!5 = !{i32 1, !"wchar_size", i32 4} ++!6 = !{i32 7, !"PIC Level", i32 1} ++!7 = !{!"clang version 10.0.1"} ++!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !9, scopeLine: 4, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) ++!9 = !DISubroutineType(types: !10) ++!10 = !{!11} ++!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++!12 = !DILocation(line: 5, column: 3, scope: !8) ++!13 = !DILocation(line: 6, column: 3, scope: !8) +diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000..2b5a4893e +--- /dev/null ++++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if not 'LoongArch' in config.root.targets: ++ config.unsupported = True +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +deleted file mode 100644 +index a10845d74..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s ++++ /dev/null +@@ -1,99 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: fadd.s $fs5, $ft7, $fs1 +-# ASM: encoding: [0xfd,0xe5,0x00,0x01] +-fadd.s $fs5, $ft7, $fs1 +- +-# ASM-AND-OBJ: fadd.d $fs1, $fa7, $ft5 +-# ASM: encoding: [0xf9,0x34,0x01,0x01] +-fadd.d $fs1, $fa7, $ft5 +- +-# ASM-AND-OBJ: fsub.d $fs5, $fa1, $ft10 +-# ASM: encoding: [0x3d,0x48,0x03,0x01] +-fsub.d $fs5, $fa1, $ft10 +- +-# ASM-AND-OBJ: fmul.d $fa4, $fs6, $fa7 +-# ASM: encoding: [0xc4,0x1f,0x05,0x01] +-fmul.d $fa4, $fs6, $fa7 +- +-# ASM-AND-OBJ: fdiv.d $fa3, $fs1, $fs4 +-# ASM: encoding: [0x23,0x73,0x07,0x01] +-fdiv.d $fa3, $fs1, $fs4 +- +-# ASM-AND-OBJ: fmadd.d $ft13, $fs0, $fs4, $fs0 +-# ASM: encoding: [0x15,0x73,0x2c,0x08] +-fmadd.d $ft13, $fs0, $fs4, $fs0 +- +-# ASM-AND-OBJ: fmsub.d $fa6, $ft10, $ft12, $fs3 +-# ASM: encoding: [0x46,0xd2,0x6d,0x08] +-fmsub.d $fa6, $ft10, $ft12, $fs3 +- +-# ASM-AND-OBJ: fnmadd.d $fs1, $ft5, $ft11, $fs6 +-# ASM: encoding: [0xb9,0x4d,0xaf,0x08] +-fnmadd.d $fs1, $ft5, $ft11, $fs6 +- +-# ASM-AND-OBJ: fnmsub.d $fs6, $fs2, $fa7, $fs0 +-# ASM: encoding: [0x5e,0x1f,0xec,0x08] +-fnmsub.d $fs6, $fs2, $fa7, $fs0 +- +-# ASM-AND-OBJ: fmax.d $ft3, $fs2, $ft5 +-# ASM: encoding: [0x4b,0x37,0x09,0x01] +-fmax.d $ft3, $fs2, $ft5 +- +-# ASM-AND-OBJ: fmin.d $fa1, $ft5, $fs3 +-# ASM: encoding: [0xa1,0x6d,0x0b,0x01] +-fmin.d $fa1, $ft5, $fs3 +- +-# ASM-AND-OBJ: fmaxa.d $fs0, $ft5, $fa4 +-# ASM: encoding: [0xb8,0x11,0x0d,0x01] +-fmaxa.d $fs0, $ft5, $fa4 +- +-# ASM-AND-OBJ: fmina.d $ft10, $ft2, $fa0 +-# ASM: encoding: [0x52,0x01,0x0f,0x01] +-fmina.d $ft10, $ft2, $fa0 +- +-# ASM-AND-OBJ: fabs.d $ft15, $fa3 +-# ASM: encoding: [0x77,0x08,0x14,0x01] +-fabs.d $ft15, $fa3 +- +-# ASM-AND-OBJ: fneg.d $ft3, $fs2 +-# ASM: encoding: [0x4b,0x1b,0x14,0x01] +-fneg.d $ft3, $fs2 +- +-# ASM-AND-OBJ: fsqrt.d $fa2, $ft3 +-# ASM: encoding: [0x62,0x49,0x14,0x01] +-fsqrt.d $fa2, $ft3 +- +-# ASM-AND-OBJ: frecip.d $fs3, $fs3 +-# ASM: encoding: [0x7b,0x5b,0x14,0x01] +-frecip.d $fs3, $fs3 +- +-# ASM-AND-OBJ: frsqrt.d $ft14, $fa3 +-# ASM: encoding: [0x76,0x68,0x14,0x01] +-frsqrt.d $ft14, $fa3 +- +-# ASM-AND-OBJ: fscaleb.d $ft4, $ft6, $fs2 +-# ASM: encoding: [0xcc,0x69,0x11,0x01] +-fscaleb.d $ft4, $ft6, $fs2 +- +-# ASM-AND-OBJ: flogb.d $ft13, $fs5 +-# ASM: encoding: [0xb5,0x2b,0x14,0x01] +-flogb.d $ft13, $fs5 +- +-# ASM-AND-OBJ: fcopysign.d $ft8, $fs2, $fa6 +-# ASM: encoding: [0x50,0x1b,0x13,0x01] +-fcopysign.d $ft8, $fs2, $fa6 +- +-# ASM-AND-OBJ: fclass.d $ft11, $fa2 +-# ASM: encoding: [0x53,0x38,0x14,0x01] +-fclass.d $ft11, $fa2 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s +deleted file mode 100644 +index 1d6b489f3..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s ++++ /dev/null +@@ -1,31 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: fldgt.s $fa3, $s4, $t1 +-# ASM: encoding: [0x63,0x37,0x74,0x38] +-fldgt.s $fa3, $s4, $t1 +- +-# ASM-AND-OBJ: fldgt.d $fs2, $a1, $s8 +-# ASM: encoding: [0xba,0xfc,0x74,0x38] +-fldgt.d $fs2, $a1, $s8 +- +-# ASM-AND-OBJ: fldle.d $fa3, $t3, $fp +-# ASM: encoding: [0xe3,0xd9,0x75,0x38] +-fldle.d $fa3, $t3, $fp +- +-# ASM-AND-OBJ: fstgt.d $ft5, $a7, $s3 +-# ASM: encoding: [0x6d,0xe9,0x76,0x38] +-fstgt.d $ft5, $a7, $s3 +- +-# ASM-AND-OBJ: fstle.d $ft10, $a5, $t1 +-# ASM: encoding: [0x32,0xb5,0x77,0x38] +-fstle.d $ft10, $a5, $t1 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s +deleted file mode 100644 +index 838b7e933..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s ++++ /dev/null +@@ -1,15 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: bceqz $fcc6, 12 +-# ASM: encoding: [0xc0,0x0c,0x00,0x48] +-bceqz $fcc6, 12 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s +deleted file mode 100644 +index 3ddae6d05..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s ++++ /dev/null +@@ -1,103 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: fcmp.caf.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x10,0x0c] +-fcmp.caf.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.caf.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x20,0x0c] +-fcmp.caf.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cun.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x24,0x0c] +-fcmp.cun.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.ceq.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x22,0x0c] +-fcmp.ceq.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cueq.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x26,0x0c] +-fcmp.cueq.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.clt.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x21,0x0c] +-fcmp.clt.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cult.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x25,0x0c] +-fcmp.cult.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cle.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x23,0x0c] +-fcmp.cle.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cule.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x27,0x0c] +-fcmp.cule.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cne.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x28,0x0c] +-fcmp.cne.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cor.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x2a,0x0c] +-fcmp.cor.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cune.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x2c,0x0c] +-fcmp.cune.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.saf.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x20,0x0c] +-fcmp.saf.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sun.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x24,0x0c] +-fcmp.sun.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.seq.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x22,0x0c] +-fcmp.seq.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sueq.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x26,0x0c] +-fcmp.sueq.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.slt.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x21,0x0c] +-fcmp.slt.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sult.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x25,0x0c] +-fcmp.sult.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sle.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x23,0x0c] +-fcmp.sle.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sule.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x27,0x0c] +-fcmp.sule.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sne.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x28,0x0c] +-fcmp.sne.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sor.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x2a,0x0c] +-fcmp.sor.d $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sune.d $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x2c,0x0c] +-fcmp.sune.d $fcc0, $fa0, $fa1 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s +deleted file mode 100644 +index fa5a5088e..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s ++++ /dev/null +@@ -1,99 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: frint.s $fa5, $ft9 +-# ASM: encoding: [0x25,0x46,0x1e,0x01] +-frint.s $fa5, $ft9 +- +-# ASM-AND-OBJ: fcvt.s.d $ft4, $ft11 +-# ASM: encoding: [0x6c,0x1a,0x19,0x01] +-fcvt.s.d $ft4, $ft11 +- +-# ASM-AND-OBJ: fcvt.d.s $ft2, $fa6 +-# ASM: encoding: [0xca,0x24,0x19,0x01] +-fcvt.d.s $ft2, $fa6 +- +-# ASM-AND-OBJ: ffint.s.l $fa6, $fa5 +-# ASM: encoding: [0xa6,0x18,0x1d,0x01] +-ffint.s.l $fa6, $fa5 +- +-# ASM-AND-OBJ: ffint.d.w $fs0, $ft10 +-# ASM: encoding: [0x58,0x22,0x1d,0x01] +-ffint.d.w $fs0, $ft10 +- +-# ASM-AND-OBJ: ffint.d.l $ft15, $fs2 +-# ASM: encoding: [0x57,0x2b,0x1d,0x01] +-ffint.d.l $ft15, $fs2 +- +-# ASM-AND-OBJ: ftint.w.d $fa3, $ft6 +-# ASM: encoding: [0xc3,0x09,0x1b,0x01] +-ftint.w.d $fa3, $ft6 +- +-# ASM-AND-OBJ: ftint.l.s $fs7, $fs0 +-# ASM: encoding: [0x1f,0x27,0x1b,0x01] +-ftint.l.s $fs7, $fs0 +- +-# ASM-AND-OBJ: ftint.l.d $ft8, $fs0 +-# ASM: encoding: [0x10,0x2b,0x1b,0x01] +-ftint.l.d $ft8, $fs0 +- +-# ASM-AND-OBJ: ftintrm.w.d $fa7, $ft0 +-# ASM: encoding: [0x07,0x09,0x1a,0x01] +-ftintrm.w.d $fa7, $ft0 +- +-# ASM-AND-OBJ: ftintrm.l.s $fs0, $ft2 +-# ASM: encoding: [0x58,0x25,0x1a,0x01] +-ftintrm.l.s $fs0, $ft2 +- +-# ASM-AND-OBJ: ftintrm.l.d $ft1, $ft1 +-# ASM: encoding: [0x29,0x29,0x1a,0x01] +-ftintrm.l.d $ft1, $ft1 +- +-# ASM-AND-OBJ: ftintrp.w.d $ft4, $fa3 +-# ASM: encoding: [0x6c,0x48,0x1a,0x01] +-ftintrp.w.d $ft4, $fa3 +- +-# ASM-AND-OBJ: ftintrp.l.s $fa0, $ft8 +-# ASM: encoding: [0x00,0x66,0x1a,0x01] +-ftintrp.l.s $fa0, $ft8 +- +-# ASM-AND-OBJ: ftintrp.l.d $fa4, $fs5 +-# ASM: encoding: [0xa4,0x6b,0x1a,0x01] +-ftintrp.l.d $fa4, $fs5 +- +-# ASM-AND-OBJ: ftintrz.w.d $fs1, $fs0 +-# ASM: encoding: [0x19,0x8b,0x1a,0x01] +-ftintrz.w.d $fs1, $fs0 +- +-# ASM-AND-OBJ: ftintrz.l.s $ft15, $fa5 +-# ASM: encoding: [0xb7,0xa4,0x1a,0x01] +-ftintrz.l.s $ft15, $fa5 +- +-# ASM-AND-OBJ: ftintrz.l.d $fa3, $ft2 +-# ASM: encoding: [0x43,0xa9,0x1a,0x01] +-ftintrz.l.d $fa3, $ft2 +- +-# ASM-AND-OBJ: ftintrne.w.d $fs7, $ft4 +-# ASM: encoding: [0x9f,0xc9,0x1a,0x01] +-ftintrne.w.d $fs7, $ft4 +- +-# ASM-AND-OBJ: ftintrne.l.s $ft14, $fs3 +-# ASM: encoding: [0x76,0xe7,0x1a,0x01] +-ftintrne.l.s $ft14, $fs3 +- +-# ASM-AND-OBJ: ftintrne.l.d $fs4, $fa6 +-# ASM: encoding: [0xdc,0xe8,0x1a,0x01] +-ftintrne.l.d $fs4, $fa6 +- +-# ASM-AND-OBJ: frint.d $fs5, $fa2 +-# ASM: encoding: [0x5d,0x48,0x1e,0x01] +-frint.d $fs5, $fa2 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s +deleted file mode 100644 +index b38a4461a..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s ++++ /dev/null +@@ -1,7 +0,0 @@ +-# RUN: not llvm-mc --triple=loongarch32 -mattr=+d %s 2>&1 | FileCheck %s +- +-# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +-movgr2fr.d $fa0, $a0 +- +-# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +-movfr2gr.d $a0, $fa0 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s +deleted file mode 100644 +index a8f04cefe..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s ++++ /dev/null +@@ -1,31 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: fld.s $ft15, $t3, 250 +-# ASM: encoding: [0xf7,0xe9,0x03,0x2b] +-fld.s $ft15, $t3, 250 +- +-# ASM-AND-OBJ: fld.d $ft14, $t5, 114 +-# ASM: encoding: [0x36,0xca,0x81,0x2b] +-fld.d $ft14, $t5, 114 +- +-# ASM-AND-OBJ: fst.d $fs4, $a3, 198 +-# ASM: encoding: [0xfc,0x18,0xc3,0x2b] +-fst.d $fs4, $a3, 198 +- +-# ASM-AND-OBJ: fldx.d $fs3, $t1, $s8 +-# ASM: encoding: [0xbb,0x7d,0x34,0x38] +-fldx.d $fs3, $t1, $s8 +- +-# ASM-AND-OBJ: fstx.d $fa6, $t3, $t5 +-# ASM: encoding: [0xe6,0x45,0x3c,0x38] +-fstx.d $fa6, $t3, $t5 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-move.s b/llvm/test/MC/LoongArch/Basic/Float/d-move.s +deleted file mode 100644 +index c3008add6..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/d-move.s ++++ /dev/null +@@ -1,39 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM,ASM-AND-OBJ64,ASM64 %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj --defsym=LA64=1 \ +-# RUN: | llvm-objdump -d --mattr=+d - \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM-AND-OBJ64 %s +- +-## Support for the 'D' extension implies support for 'F' +-# ASM-AND-OBJ: fmov.s $ft5, $ft15 +-# ASM: encoding: [0xed,0x96,0x14,0x01] +-fmov.s $ft5, $ft15 +- +-# ASM-AND-OBJ: fmov.d $fs6, $ft1 +-# ASM: encoding: [0x3e,0x99,0x14,0x01] +-fmov.d $fs6, $ft1 +- +-# ASM-AND-OBJ: fsel $ft10, $ft12, $ft13, $fcc4 +-# ASM: encoding: [0x92,0x56,0x02,0x0d] +-fsel $ft10, $ft12, $ft13, $fcc4 +- +-# ASM-AND-OBJ64: movgr2frh.w $ft15, $s3 +-# ASM64: encoding: [0x57,0xaf,0x14,0x01] +-movgr2frh.w $ft15, $s3 +- +-.ifdef LA64 +- +-# ASM-AND-OBJ64: movgr2fr.d $fs6, $a7 +-# ASM64: encoding: [0x7e,0xa9,0x14,0x01] +-movgr2fr.d $fs6, $a7 +- +-# ASM-AND-OBJ64: movfr2gr.d $s3, $ft9 +-# ASM64: encoding: [0x3a,0xba,0x14,0x01] +-movfr2gr.d $s3, $ft9 +- +-.endif +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +deleted file mode 100644 +index a5873a545..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s ++++ /dev/null +@@ -1,94 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: fadd.s $fs5, $ft7, $fs1 +-# ASM: encoding: [0xfd,0xe5,0x00,0x01] +-fadd.s $fs5, $ft7, $fs1 +- +-# ASM-AND-OBJ: fsub.s $ft6, $fa6, $fs7 +-# ASM: encoding: [0xce,0xfc,0x02,0x01] +-fsub.s $ft6, $fa6, $fs7 +- +-# ASM-AND-OBJ: fmul.s $fa0, $fa7, $ft9 +-# ASM: encoding: [0xe0,0xc4,0x04,0x01] +-fmul.s $fa0, $fa7, $ft9 +- +-# ASM-AND-OBJ: fdiv.s $ft12, $fs0, $ft11 +-# ASM: encoding: [0x14,0xcf,0x06,0x01] +-fdiv.s $ft12, $fs0, $ft11 +- +-# ASM-AND-OBJ: fmadd.s $fa3, $ft8, $fa3, $ft7 +-# ASM: encoding: [0x03,0x8e,0x17,0x08] +-fmadd.s $fa3, $ft8, $fa3, $ft7 +- +-# ASM-AND-OBJ: fmsub.s $ft15, $ft3, $ft13, $fa4 +-# ASM: encoding: [0x77,0x55,0x52,0x08] +-fmsub.s $ft15, $ft3, $ft13, $fa4 +- +-# ASM-AND-OBJ: fnmadd.s $fs5, $fa1, $fs0, $ft12 +-# ASM: encoding: [0x3d,0x60,0x9a,0x08] +-fnmadd.s $fs5, $fa1, $fs0, $ft12 +- +-# ASM-AND-OBJ: fnmsub.s $ft0, $fa4, $fs0, $fs1 +-# ASM: encoding: [0x88,0xe0,0xdc,0x08] +-fnmsub.s $ft0, $fa4, $fs0, $fs1 +- +-# ASM-AND-OBJ: fmax.s $ft14, $fa6, $fs3 +-# ASM: encoding: [0xd6,0xec,0x08,0x01] +-fmax.s $ft14, $fa6, $fs3 +- +-# ASM-AND-OBJ: fmin.s $ft6, $ft2, $ft11 +-# ASM: encoding: [0x4e,0xcd,0x0a,0x01] +-fmin.s $ft6, $ft2, $ft11 +- +-# ASM-AND-OBJ: fmaxa.s $ft1, $fs3, $fs7 +-# ASM: encoding: [0x69,0xff,0x0c,0x01] +-fmaxa.s $ft1, $fs3, $fs7 +- +-# ASM-AND-OBJ: fmina.s $ft7, $ft10, $fa1 +-# ASM: encoding: [0x4f,0x86,0x0e,0x01] +-fmina.s $ft7, $ft10, $fa1 +- +-# ASM-AND-OBJ: fabs.s $fs4, $ft4 +-# ASM: encoding: [0x9c,0x05,0x14,0x01] +-fabs.s $fs4, $ft4 +- +-# ASM-AND-OBJ: fneg.s $ft13, $fs0 +-# ASM: encoding: [0x15,0x17,0x14,0x01] +-fneg.s $ft13, $fs0 +- +-# ASM-AND-OBJ: fsqrt.s $fs3, $ft10 +-# ASM: encoding: [0x5b,0x46,0x14,0x01] +-fsqrt.s $fs3, $ft10 +- +-# ASM-AND-OBJ: frecip.s $ft9, $fs3 +-# ASM: encoding: [0x71,0x57,0x14,0x01] +-frecip.s $ft9, $fs3 +- +-# ASM-AND-OBJ: frsqrt.s $fs1, $ft4 +-# ASM: encoding: [0x99,0x65,0x14,0x01] +-frsqrt.s $fs1, $ft4 +- +-# ASM-AND-OBJ: fscaleb.s $ft13, $ft15, $fa6 +-# ASM: encoding: [0xf5,0x9a,0x10,0x01] +-fscaleb.s $ft13, $ft15, $fa6 +- +-# ASM-AND-OBJ: flogb.s $fs7, $ft15 +-# ASM: encoding: [0xff,0x26,0x14,0x01] +-flogb.s $fs7, $ft15 +- +-# ASM-AND-OBJ: fcopysign.s $ft5, $fs0, $ft15 +-# ASM: encoding: [0x0d,0xdf,0x12,0x01] +-fcopysign.s $ft5, $fs0, $ft15 +- +-# ASM-AND-OBJ: fclass.s $ft12, $ft1 +-# ASM: encoding: [0x34,0x35,0x14,0x01] +-fclass.s $ft12, $ft1 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s +deleted file mode 100644 +index bfff92ff8..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s ++++ /dev/null +@@ -1,26 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: fldgt.s $fa3, $s4, $t1 +-# ASM: encoding: [0x63,0x37,0x74,0x38] +-fldgt.s $fa3, $s4, $t1 +- +-# ASM-AND-OBJ: fldle.s $fs0, $s6, $t5 +-# ASM: encoding: [0xb8,0x47,0x75,0x38] +-fldle.s $fs0, $s6, $t5 +- +-# ASM-AND-OBJ: fstgt.s $fs7, $t1, $s7 +-# ASM: encoding: [0xbf,0x79,0x76,0x38] +-fstgt.s $fs7, $t1, $s7 +- +-# ASM-AND-OBJ: fstle.s $ft5, $t1, $a3 +-# ASM: encoding: [0xad,0x1d,0x77,0x38] +-fstle.s $ft5, $t1, $a3 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s +deleted file mode 100644 +index 583008b5a..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s ++++ /dev/null +@@ -1,18 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: bceqz $fcc6, 12 +-# ASM: encoding: [0xc0,0x0c,0x00,0x48] +-bceqz $fcc6, 12 +- +-# ASM-AND-OBJ: bcnez $fcc6, 72 +-# ASM: encoding: [0xc0,0x49,0x00,0x48] +-bcnez $fcc6, 72 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s +deleted file mode 100644 +index cc4e1470d..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s ++++ /dev/null +@@ -1,98 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: fcmp.caf.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x10,0x0c] +-fcmp.caf.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cun.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x14,0x0c] +-fcmp.cun.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.ceq.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x12,0x0c] +-fcmp.ceq.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cueq.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x16,0x0c] +-fcmp.cueq.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.clt.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x11,0x0c] +-fcmp.clt.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cult.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x15,0x0c] +-fcmp.cult.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cle.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x13,0x0c] +-fcmp.cle.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cule.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x17,0x0c] +-fcmp.cule.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cne.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x18,0x0c] +-fcmp.cne.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cor.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x1a,0x0c] +-fcmp.cor.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.cune.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x04,0x1c,0x0c] +-fcmp.cune.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.saf.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x10,0x0c] +-fcmp.saf.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sun.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x14,0x0c] +-fcmp.sun.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.seq.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x12,0x0c] +-fcmp.seq.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sueq.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x16,0x0c] +-fcmp.sueq.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.slt.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x11,0x0c] +-fcmp.slt.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sult.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x15,0x0c] +-fcmp.sult.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sle.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x13,0x0c] +-fcmp.sle.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sule.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x17,0x0c] +-fcmp.sule.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sne.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x18,0x0c] +-fcmp.sne.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sor.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x1a,0x0c] +-fcmp.sor.s $fcc0, $fa0, $fa1 +- +-# ASM-AND-OBJ: fcmp.sune.s $fcc0, $fa0, $fa1 +-# ASM: encoding: [0x00,0x84,0x1c,0x0c] +-fcmp.sune.s $fcc0, $fa0, $fa1 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s +deleted file mode 100644 +index db44077df..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s ++++ /dev/null +@@ -1,38 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: ffint.s.w $fs6, $fa5 +-# ASM: encoding: [0xbe,0x10,0x1d,0x01] +-ffint.s.w $fs6, $fa5 +- +-# ASM-AND-OBJ: ftint.w.s $ft13, $ft5 +-# ASM: encoding: [0xb5,0x05,0x1b,0x01] +-ftint.w.s $ft13, $ft5 +- +-# ASM-AND-OBJ: ftintrm.w.s $ft8, $ft8 +-# ASM: encoding: [0x10,0x06,0x1a,0x01] +-ftintrm.w.s $ft8, $ft8 +- +-# ASM-AND-OBJ: ftintrp.w.s $ft6, $fs7 +-# ASM: encoding: [0xee,0x47,0x1a,0x01] +-ftintrp.w.s $ft6, $fs7 +- +-# ASM-AND-OBJ: ftintrz.w.s $fa4, $fs5 +-# ASM: encoding: [0xa4,0x87,0x1a,0x01] +-ftintrz.w.s $fa4, $fs5 +- +-# ASM-AND-OBJ: ftintrne.w.s $fa4, $ft9 +-# ASM: encoding: [0x24,0xc6,0x1a,0x01] +-ftintrne.w.s $fa4, $ft9 +- +-# ASM-AND-OBJ: frint.s $fa5, $ft9 +-# ASM: encoding: [0x25,0x46,0x1e,0x01] +-frint.s $fa5, $ft9 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s +deleted file mode 100644 +index 2ab91b3f1..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s ++++ /dev/null +@@ -1,4 +0,0 @@ +-# RUN: not llvm-mc --triple=loongarch32 -mattr=+f %s 2>&1 | FileCheck %s +- +-# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) +-fadd.d $fa0, $fa0, $fa0 +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s +deleted file mode 100644 +index b5fbd9abd..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s ++++ /dev/null +@@ -1,26 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: fld.s $ft15, $t3, 250 +-# ASM: encoding: [0xf7,0xe9,0x03,0x2b] +-fld.s $ft15, $t3, 250 +- +-# ASM-AND-OBJ: fst.s $fs6, $t7, 230 +-# ASM: encoding: [0x7e,0x9a,0x43,0x2b] +-fst.s $fs6, $t7, 230 +- +-# ASM-AND-OBJ: fldx.s $fa1, $t3, $t7 +-# ASM: encoding: [0xe1,0x4d,0x30,0x38] +-fldx.s $fa1, $t3, $t7 +- +-# ASM-AND-OBJ: fstx.s $fs2, $sp, $fp +-# ASM: encoding: [0x7a,0x58,0x38,0x38] +-fstx.s $fs2, $sp, $fp +diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-move.s b/llvm/test/MC/LoongArch/Basic/Float/f-move.s +deleted file mode 100644 +index da9107686..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Float/f-move.s ++++ /dev/null +@@ -1,74 +0,0 @@ +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +-# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s +-# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +-# RUN: | llvm-objdump -d --mattr=+f - \ +-# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s +- +-# ASM-AND-OBJ: fmov.s $ft5, $ft15 +-# ASM: encoding: [0xed,0x96,0x14,0x01] +-fmov.s $ft5, $ft15 +- +-# ASM-AND-OBJ: fsel $ft10, $ft12, $ft13, $fcc4 +-# ASM: encoding: [0x92,0x56,0x02,0x0d] +-fsel $ft10, $ft12, $ft13, $fcc4 +- +-# ASM-AND-OBJ: movgr2fr.w $fa6, $tp +-# ASM: encoding: [0x46,0xa4,0x14,0x01] +-movgr2fr.w $fa6, $tp +- +-# ASM-AND-OBJ: movfr2gr.s $a6, $ft14 +-# ASM: encoding: [0xca,0xb6,0x14,0x01] +-movfr2gr.s $a6, $ft14 +- +-# ASM-AND-OBJ: movgr2fcsr $fcsr0, $a0 +-# ASM: encoding: [0x80,0xc0,0x14,0x01] +-movgr2fcsr $fcsr0, $a0 +- +-# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr0 +-# ASM: encoding: [0x04,0xc8,0x14,0x01] +-movfcsr2gr $a0, $fcsr0 +- +-# ASM-AND-OBJ: movgr2fcsr $fcsr1, $a0 +-# ASM: encoding: [0x81,0xc0,0x14,0x01] +-movgr2fcsr $fcsr1, $a0 +- +-# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr1 +-# ASM: encoding: [0x24,0xc8,0x14,0x01] +-movfcsr2gr $a0, $fcsr1 +- +-# ASM-AND-OBJ: movgr2fcsr $fcsr2, $a0 +-# ASM: encoding: [0x82,0xc0,0x14,0x01] +-movgr2fcsr $fcsr2, $a0 +- +-# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr2 +-# ASM: encoding: [0x44,0xc8,0x14,0x01] +-movfcsr2gr $a0, $fcsr2 +- +-# ASM-AND-OBJ: movgr2fcsr $fcsr3, $a0 +-# ASM: encoding: [0x83,0xc0,0x14,0x01] +-movgr2fcsr $fcsr3, $a0 +- +-# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr3 +-# ASM: encoding: [0x64,0xc8,0x14,0x01] +-movfcsr2gr $a0, $fcsr3 +- +-# ASM-AND-OBJ: movfr2cf $fcc4, $ft3 +-# ASM: encoding: [0x64,0xd1,0x14,0x01] +-movfr2cf $fcc4, $ft3 +- +-# ASM-AND-OBJ: movcf2fr $ft8, $fcc0 +-# ASM: encoding: [0x10,0xd4,0x14,0x01] +-movcf2fr $ft8, $fcc0 +- +-# ASM-AND-OBJ: movgr2cf $fcc5, $ra +-# ASM: encoding: [0x25,0xd8,0x14,0x01] +-movgr2cf $fcc5, $ra +- +-# ASM-AND-OBJ: movcf2gr $r21, $fcc7 +-# ASM: encoding: [0xf5,0xdc,0x14,0x01] +-movcf2gr $r21, $fcc7 +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/arith.s b/llvm/test/MC/LoongArch/Basic/Integer/arith.s +deleted file mode 100644 +index bfb3a4c11..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/arith.s ++++ /dev/null +@@ -1,212 +0,0 @@ +-## Test valid arithmetic operation instructions +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: add.w $a5, $ra, $s8 +-# CHECK-ASM: encoding: [0x29,0x7c,0x10,0x00] +-add.w $a5, $ra, $s8 +- +-# CHECK-ASM-AND-OBJ: sub.w $r21, $s2, $t7 +-# CHECK-ASM: encoding: [0x35,0x4f,0x11,0x00] +-sub.w $r21, $s2, $t7 +- +-# CHECK-ASM-AND-OBJ: addi.w $a1, $a3, 246 +-# CHECK-ASM: encoding: [0xe5,0xd8,0x83,0x02] +-addi.w $a1, $a3, 246 +- +-# CHECK-ASM-AND-OBJ: alsl.w $tp, $t5, $tp, 4 +-# CHECK-ASM: encoding: [0x22,0x8a,0x05,0x00] +-alsl.w $tp, $t5, $tp, 4 +- +-# CHECK-ASM-AND-OBJ: lu12i.w $t4, 49 +-# CHECK-ASM: encoding: [0x30,0x06,0x00,0x14] +-lu12i.w $t4, 49 +- +-# CHECK-ASM-AND-OBJ: lu12i.w $a0, -1 +-# CHECK-ASM: encoding: [0xe4,0xff,0xff,0x15] +-lu12i.w $a0, -1 +- +-# CHECK-ASM-AND-OBJ: slt $s6, $s3, $tp +-# CHECK-ASM: encoding: [0x5d,0x0b,0x12,0x00] +-slt $s6, $s3, $tp +- +-# CHECK-ASM-AND-OBJ: sltu $a7, $r21, $s6 +-# CHECK-ASM: encoding: [0xab,0xf6,0x12,0x00] +-sltu $a7, $r21, $s6 +- +-# CHECK-ASM-AND-OBJ: slti $s4, $ra, 235 +-# CHECK-ASM: encoding: [0x3b,0xac,0x03,0x02] +-slti $s4, $ra, 235 +- +-# CHECK-ASM-AND-OBJ: sltui $zero, $a4, 162 +-# CHECK-ASM: encoding: [0x00,0x89,0x42,0x02] +-sltui $zero, $a4, 162 +- +-# CHECK-ASM-AND-OBJ: pcaddi $a5, 187 +-# CHECK-ASM: encoding: [0x69,0x17,0x00,0x18] +-pcaddi $a5, 187 +- +-# CHECK-ASM-AND-OBJ: pcaddu12i $zero, 37 +-# CHECK-ASM: encoding: [0xa0,0x04,0x00,0x1c] +-pcaddu12i $zero, 37 +- +-# CHECK-ASM-AND-OBJ: pcalau12i $a6, 89 +-# CHECK-ASM: encoding: [0x2a,0x0b,0x00,0x1a] +-pcalau12i $a6, 89 +- +-# CHECK-ASM-AND-OBJ: and $t7, $s8, $ra +-# CHECK-ASM: encoding: [0xf3,0x87,0x14,0x00] +-and $t7, $s8, $ra +- +-# CHECK-ASM-AND-OBJ: or $t5, $t4, $s7 +-# CHECK-ASM: encoding: [0x11,0x7a,0x15,0x00] +-or $t5, $t4, $s7 +- +-# CHECK-ASM-AND-OBJ: nor $a1, $t6, $a1 +-# CHECK-ASM: encoding: [0x45,0x16,0x14,0x00] +-nor $a1, $t6, $a1 +- +-# CHECK-ASM-AND-OBJ: xor $t3, $t7, $a4 +-# CHECK-ASM: encoding: [0x6f,0xa2,0x15,0x00] +-xor $t3, $t7, $a4 +- +-# CHECK-ASM-AND-OBJ: andn $s5, $s2, $a1 +-# CHECK-ASM: encoding: [0x3c,0x97,0x16,0x00] +-andn $s5, $s2, $a1 +- +-# CHECK-ASM-AND-OBJ: orn $tp, $sp, $s2 +-# CHECK-ASM: encoding: [0x62,0x64,0x16,0x00] +-orn $tp, $sp, $s2 +- +-# CHECK-ASM-AND-OBJ: andi $s2, $zero, 106 +-# CHECK-ASM: encoding: [0x19,0xa8,0x41,0x03] +-andi $s2, $zero, 106 +- +-# CHECK-ASM-AND-OBJ: ori $t5, $a1, 47 +-# CHECK-ASM: encoding: [0xb1,0xbc,0x80,0x03] +-ori $t5, $a1, 47 +- +-# CHECK-ASM-AND-OBJ: xori $t6, $s0, 99 +-# CHECK-ASM: encoding: [0xf2,0x8e,0xc1,0x03] +-xori $t6, $s0, 99 +- +-# CHECK-ASM-AND-OBJ: mul.w $a0, $t6, $sp +-# CHECK-ASM: encoding: [0x44,0x0e,0x1c,0x00] +-mul.w $a0, $t6, $sp +- +-# CHECK-ASM-AND-OBJ: mulh.w $s4, $s0, $zero +-# CHECK-ASM: encoding: [0xfb,0x82,0x1c,0x00] +-mulh.w $s4, $s0, $zero +- +-# CHECK-ASM-AND-OBJ: mulh.wu $a6, $t5, $s1 +-# CHECK-ASM: encoding: [0x2a,0x62,0x1d,0x00] +-mulh.wu $a6, $t5, $s1 +- +-# CHECK-ASM-AND-OBJ: div.w $s7, $t1, $s2 +-# CHECK-ASM: encoding: [0xbe,0x65,0x20,0x00] +-div.w $s7, $t1, $s2 +- +-# CHECK-ASM-AND-OBJ: mod.w $ra, $s3, $a6 +-# CHECK-ASM: encoding: [0x41,0xab,0x20,0x00] +-mod.w $ra, $s3, $a6 +- +-# CHECK-ASM-AND-OBJ: div.wu $t7, $s0, $zero +-# CHECK-ASM: encoding: [0xf3,0x02,0x21,0x00] +-div.wu $t7, $s0, $zero +- +-# CHECK-ASM-AND-OBJ: mod.wu $s4, $a5, $t5 +-# CHECK-ASM: encoding: [0x3b,0xc5,0x21,0x00] +-mod.wu $s4, $a5, $t5 +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: add.d $tp, $t6, $s4 +-# CHECK64-ASM: encoding: [0x42,0xee,0x10,0x00] +-add.d $tp, $t6, $s4 +- +-# CHECK64-ASM-AND-OBJ: sub.d $a3, $t0, $a3 +-# CHECK64-ASM: encoding: [0x87,0x9d,0x11,0x00] +-sub.d $a3, $t0, $a3 +- +-# CHECK64-ASM-AND-OBJ: addi.d $s5, $a2, 75 +-# CHECK64-ASM: encoding: [0xdc,0x2c,0xc1,0x02] +-addi.d $s5, $a2, 75 +- +-# CHECK64-ASM-AND-OBJ: addu16i.d $a5, $s0, 23 +-# CHECK64-ASM: encoding: [0xe9,0x5e,0x00,0x10] +-addu16i.d $a5, $s0, 23 +- +-# CHECK64-ASM-AND-OBJ: alsl.wu $t7, $a4, $s2, 1 +-# CHECK64-ASM: encoding: [0x13,0x65,0x06,0x00] +-alsl.wu $t7, $a4, $s2, 1 +- +-# CHECK64-ASM-AND-OBJ: alsl.d $t5, $a7, $a1, 3 +-# CHECK64-ASM: encoding: [0x71,0x15,0x2d,0x00] +-alsl.d $t5, $a7, $a1, 3 +- +-# CHECK64-ASM-AND-OBJ: lu32i.d $sp, 196 +-# CHECK64-ASM: encoding: [0x83,0x18,0x00,0x16] +-lu32i.d $sp, 196 +- +-# CHECK64-ASM-AND-OBJ: lu52i.d $t1, $a0, 195 +-# CHECK64-ASM: encoding: [0x8d,0x0c,0x03,0x03] +-lu52i.d $t1, $a0, 195 +- +-# CHECK64-ASM-AND-OBJ: pcaddu18i $t0, 26 +-# CHECK64-ASM: encoding: [0x4c,0x03,0x00,0x1e] +-pcaddu18i $t0, 26 +- +-# CHECK64-ASM-AND-OBJ: mul.d $ra, $t2, $s1 +-# CHECK64-ASM: encoding: [0xc1,0xe1,0x1d,0x00] +-mul.d $ra, $t2, $s1 +- +-# CHECK64-ASM-AND-OBJ: mulh.d $s5, $ra, $s4 +-# CHECK64-ASM: encoding: [0x3c,0x6c,0x1e,0x00] +-mulh.d $s5, $ra, $s4 +- +-# CHECK64-ASM-AND-OBJ: mulh.du $t1, $s4, $s6 +-# CHECK64-ASM: encoding: [0x6d,0xf7,0x1e,0x00] +-mulh.du $t1, $s4, $s6 +- +-# CHECK64-ASM-AND-OBJ: mulw.d.w $s4, $a2, $t5 +-# CHECK64-ASM: encoding: [0xdb,0x44,0x1f,0x00] +-mulw.d.w $s4, $a2, $t5 +- +-# CHECK64-ASM-AND-OBJ: mulw.d.wu $t5, $fp, $s7 +-# CHECK64-ASM: encoding: [0xd1,0xfa,0x1f,0x00] +-mulw.d.wu $t5, $fp, $s7 +- +-# CHECK64-ASM-AND-OBJ: div.d $s0, $a2, $r21 +-# CHECK64-ASM: encoding: [0xd7,0x54,0x22,0x00] +-div.d $s0, $a2, $r21 +- +-# CHECK64-ASM-AND-OBJ: mod.d $t4, $sp, $t3 +-# CHECK64-ASM: encoding: [0x70,0xbc,0x22,0x00] +-mod.d $t4, $sp, $t3 +- +-# CHECK64-ASM-AND-OBJ: div.du $s8, $s1, $t2 +-# CHECK64-ASM: encoding: [0x1f,0x3b,0x23,0x00] +-div.du $s8, $s1, $t2 +- +-# CHECK64-ASM-AND-OBJ: mod.du $s2, $s0, $s1 +-# CHECK64-ASM: encoding: [0xf9,0xe2,0x23,0x00] +-mod.du $s2, $s0, $s1 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +deleted file mode 100644 +index 642740180..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s ++++ /dev/null +@@ -1,185 +0,0 @@ +-## Test valid atomic memory access instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: ll.w $tp, $s4, 220 +-# CHECK-ASM: encoding: [0x62,0xdf,0x00,0x20] +-ll.w $tp, $s4, 220 +- +-# CHECK-ASM-AND-OBJ: sc.w $t7, $t2, 56 +-# CHECK-ASM: encoding: [0xd3,0x39,0x00,0x21] +-sc.w $t7, $t2, 56 +- +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 +-# CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] +-amswap.w $a2, $t0, $s1 +- +-# CHECK64-ASM-AND-OBJ: amswap.d $tp, $t2, $fp +-# CHECK64-ASM: encoding: [0xc2,0xba,0x60,0x38] +-amswap.d $tp, $t2, $fp +- +-# CHECK64-ASM-AND-OBJ: amadd.w $a4, $t0, $r21 +-# CHECK64-ASM: encoding: [0xa8,0x32,0x61,0x38] +-amadd.w $a4, $t0, $r21 +- +-# CHECK64-ASM-AND-OBJ: amadd.d $a1, $t5, $s6 +-# CHECK64-ASM: encoding: [0xa5,0xc7,0x61,0x38] +-amadd.d $a1, $t5, $s6 +- +-# CHECK64-ASM-AND-OBJ: amand.w $a0, $t7, $fp +-# CHECK64-ASM: encoding: [0xc4,0x4e,0x62,0x38] +-amand.w $a0, $t7, $fp +- +-# CHECK64-ASM-AND-OBJ: amand.d $a6, $t6, $s6 +-# CHECK64-ASM: encoding: [0xaa,0xcb,0x62,0x38] +-amand.d $a6, $t6, $s6 +- +-# CHECK64-ASM-AND-OBJ: amor.w $a2, $t4, $s0 +-# CHECK64-ASM: encoding: [0xe6,0x42,0x63,0x38] +-amor.w $a2, $t4, $s0 +- +-# CHECK64-ASM-AND-OBJ: amor.d $sp, $t4, $s1 +-# CHECK64-ASM: encoding: [0x03,0xc3,0x63,0x38] +-amor.d $sp, $t4, $s1 +- +-# CHECK64-ASM-AND-OBJ: amxor.w $tp, $t3, $s0 +-# CHECK64-ASM: encoding: [0xe2,0x3e,0x64,0x38] +-amxor.w $tp, $t3, $s0 +- +-# CHECK64-ASM-AND-OBJ: amxor.d $a4, $t8, $s5 +-# CHECK64-ASM: encoding: [0x88,0xd3,0x64,0x38] +-amxor.d $a4, $t8, $s5 +- +-# CHECK64-ASM-AND-OBJ: ammax.w $ra, $a7, $s0 +-# CHECK64-ASM: encoding: [0xe1,0x2e,0x65,0x38] +-ammax.w $ra, $a7, $s0 +- +-# CHECK64-ASM-AND-OBJ: ammax.d $a5, $t8, $s4 +-# CHECK64-ASM: encoding: [0x69,0xd3,0x65,0x38] +-ammax.d $a5, $t8, $s4 +- +-# CHECK64-ASM-AND-OBJ: ammin.w $a5, $t2, $s0 +-# CHECK64-ASM: encoding: [0xe9,0x3a,0x66,0x38] +-ammin.w $a5, $t2, $s0 +- +-# CHECK64-ASM-AND-OBJ: ammin.d $a5, $t1, $fp +-# CHECK64-ASM: encoding: [0xc9,0xb6,0x66,0x38] +-ammin.d $a5, $t1, $fp +- +-# CHECK64-ASM-AND-OBJ: ammax.wu $a5, $a7, $fp +-# CHECK64-ASM: encoding: [0xc9,0x2e,0x67,0x38] +-ammax.wu $a5, $a7, $fp +- +-# CHECK64-ASM-AND-OBJ: ammax.du $a2, $t4, $s2 +-# CHECK64-ASM: encoding: [0x26,0xc3,0x67,0x38] +-ammax.du $a2, $t4, $s2 +- +-# CHECK64-ASM-AND-OBJ: ammin.wu $a4, $t6, $s7 +-# CHECK64-ASM: encoding: [0xc8,0x4b,0x68,0x38] +-ammin.wu $a4, $t6, $s7 +- +-# CHECK64-ASM-AND-OBJ: ammin.du $a3, $t4, $s2 +-# CHECK64-ASM: encoding: [0x27,0xc3,0x68,0x38] +-ammin.du $a3, $t4, $s2 +- +-# CHECK64-ASM-AND-OBJ: amswap_db.w $a2, $t0, $s1 +-# CHECK64-ASM: encoding: [0x06,0x33,0x69,0x38] +-amswap_db.w $a2, $t0, $s1 +- +-# CHECK64-ASM-AND-OBJ: amswap_db.d $tp, $t2, $fp +-# CHECK64-ASM: encoding: [0xc2,0xba,0x69,0x38] +-amswap_db.d $tp, $t2, $fp +- +-# CHECK64-ASM-AND-OBJ: amadd_db.w $a4, $t0, $r21 +-# CHECK64-ASM: encoding: [0xa8,0x32,0x6a,0x38] +-amadd_db.w $a4, $t0, $r21 +- +-# CHECK64-ASM-AND-OBJ: amadd_db.d $a1, $t5, $s6 +-# CHECK64-ASM: encoding: [0xa5,0xc7,0x6a,0x38] +-amadd_db.d $a1, $t5, $s6 +- +-# CHECK64-ASM-AND-OBJ: amand_db.w $a0, $t7, $fp +-# CHECK64-ASM: encoding: [0xc4,0x4e,0x6b,0x38] +-amand_db.w $a0, $t7, $fp +- +-# CHECK64-ASM-AND-OBJ: amand_db.d $a6, $t6, $s6 +-# CHECK64-ASM: encoding: [0xaa,0xcb,0x6b,0x38] +-amand_db.d $a6, $t6, $s6 +- +-# CHECK64-ASM-AND-OBJ: amor_db.w $a2, $t4, $s0 +-# CHECK64-ASM: encoding: [0xe6,0x42,0x6c,0x38] +-amor_db.w $a2, $t4, $s0 +- +-# CHECK64-ASM-AND-OBJ: amor_db.d $sp, $t4, $s1 +-# CHECK64-ASM: encoding: [0x03,0xc3,0x6c,0x38] +-amor_db.d $sp, $t4, $s1 +- +-# CHECK64-ASM-AND-OBJ: amxor_db.w $tp, $t3, $s0 +-# CHECK64-ASM: encoding: [0xe2,0x3e,0x6d,0x38] +-amxor_db.w $tp, $t3, $s0 +- +-# CHECK64-ASM-AND-OBJ: amxor_db.d $a4, $t8, $s5 +-# CHECK64-ASM: encoding: [0x88,0xd3,0x6d,0x38] +-amxor_db.d $a4, $t8, $s5 +- +-# CHECK64-ASM-AND-OBJ: ammax_db.w $ra, $a7, $s0 +-# CHECK64-ASM: encoding: [0xe1,0x2e,0x6e,0x38] +-ammax_db.w $ra, $a7, $s0 +- +-# CHECK64-ASM-AND-OBJ: ammax_db.d $a5, $t8, $s4 +-# CHECK64-ASM: encoding: [0x69,0xd3,0x6e,0x38] +-ammax_db.d $a5, $t8, $s4 +- +-# CHECK64-ASM-AND-OBJ: ammin_db.w $a5, $t2, $s0 +-# CHECK64-ASM: encoding: [0xe9,0x3a,0x6f,0x38] +-ammin_db.w $a5, $t2, $s0 +- +-# CHECK64-ASM-AND-OBJ: ammin_db.d $a5, $t1, $fp +-# CHECK64-ASM: encoding: [0xc9,0xb6,0x6f,0x38] +-ammin_db.d $a5, $t1, $fp +- +-# CHECK64-ASM-AND-OBJ: ammax_db.wu $a5, $a7, $fp +-# CHECK64-ASM: encoding: [0xc9,0x2e,0x70,0x38] +-ammax_db.wu $a5, $a7, $fp +- +-# CHECK64-ASM-AND-OBJ: ammax_db.du $a2, $t4, $s2 +-# CHECK64-ASM: encoding: [0x26,0xc3,0x70,0x38] +-ammax_db.du $a2, $t4, $s2 +- +-# CHECK64-ASM-AND-OBJ: ammin_db.wu $a4, $t6, $s7 +-# CHECK64-ASM: encoding: [0xc8,0x4b,0x71,0x38] +-ammin_db.wu $a4, $t6, $s7 +- +-# CHECK64-ASM-AND-OBJ: ammin_db.du $a3, $t4, $s2 +-# CHECK64-ASM: encoding: [0x27,0xc3,0x71,0x38] +-ammin_db.du $a3, $t4, $s2 +- +-# CHECK64-ASM-AND-OBJ: ll.d $s2, $s4, 16 +-# CHECK64-ASM: encoding: [0x79,0x13,0x00,0x22] +-ll.d $s2, $s4, 16 +- +-# CHECK64-ASM-AND-OBJ: sc.d $t5, $t5, 244 +-# CHECK64-ASM: encoding: [0x31,0xf6,0x00,0x23] +-sc.d $t5, $t5, 244 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/barrier.s b/llvm/test/MC/LoongArch/Basic/Integer/barrier.s +deleted file mode 100644 +index a9462fc38..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/barrier.s ++++ /dev/null +@@ -1,19 +0,0 @@ +-## Test valid barrier instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s +- +-# CHECK-ASM-AND-OBJ: dbar 0 +-# CHECK-ASM: encoding: [0x00,0x00,0x72,0x38] +-dbar 0 +- +-# CHECK-ASM-AND-OBJ: ibar 0 +-# CHECK-ASM: encoding: [0x00,0x80,0x72,0x38] +-ibar 0 +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s b/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s +deleted file mode 100644 +index 3cbe90611..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s ++++ /dev/null +@@ -1,136 +0,0 @@ +-## Test valid bit manipulation instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM: ext.w.b $t8, $t6 +-# CHECK-ASM: encoding: [0x54,0x5e,0x00,0x00] +-ext.w.b $t8, $t6 +- +-# CHECK-ASM: ext.w.h $s0, $s0 +-# CHECK-ASM: encoding: [0xf7,0x5a,0x00,0x00] +-ext.w.h $s0, $s0 +- +-# CHECK-ASM-AND-OBJ: clo.w $ra, $sp +-# CHECK-ASM: encoding: [0x61,0x10,0x00,0x00] +-clo.w $ra, $sp +- +-# CHECK-ASM-AND-OBJ: clz.w $a3, $a6 +-# CHECK-ASM: encoding: [0x47,0x15,0x00,0x00] +-clz.w $a3, $a6 +- +-# CHECK-ASM-AND-OBJ: cto.w $tp, $a2 +-# CHECK-ASM: encoding: [0xc2,0x18,0x00,0x00] +-cto.w $tp, $a2 +- +-# CHECK-ASM-AND-OBJ: ctz.w $a1, $fp +-# CHECK-ASM: encoding: [0xc5,0x1e,0x00,0x00] +-ctz.w $a1, $fp +- +-# CHECK-ASM-AND-OBJ: bytepick.w $s6, $zero, $t4, 0 +-# CHECK-ASM: encoding: [0x1d,0x40,0x08,0x00] +-bytepick.w $s6, $zero, $t4, 0 +- +-# CHECK-ASM-AND-OBJ: revb.2h $t8, $a7 +-# CHECK-ASM: encoding: [0x74,0x31,0x00,0x00] +-revb.2h $t8, $a7 +- +-# CHECK-ASM-AND-OBJ: bitrev.4b $r21, $s4 +-# CHECK-ASM: encoding: [0x75,0x4b,0x00,0x00] +-bitrev.4b $r21, $s4 +- +-# CHECK-ASM-AND-OBJ: bitrev.w $s2, $a1 +-# CHECK-ASM: encoding: [0xb9,0x50,0x00,0x00] +-bitrev.w $s2, $a1 +- +-# CHECK-ASM-AND-OBJ: bstrins.w $a4, $a7, 7, 2 +-# CHECK-ASM: encoding: [0x68,0x09,0x67,0x00] +-bstrins.w $a4, $a7, 7, 2 +- +-# CHECK-ASM-AND-OBJ: bstrpick.w $ra, $a5, 10, 4 +-# CHECK-ASM: encoding: [0x21,0x91,0x6a,0x00] +-bstrpick.w $ra, $a5, 10, 4 +- +-# CHECK-ASM-AND-OBJ: maskeqz $t8, $a7, $t6 +-# CHECK-ASM: encoding: [0x74,0x49,0x13,0x00] +-maskeqz $t8, $a7, $t6 +- +-# CHECK-ASM-AND-OBJ: masknez $t8, $t1, $s3 +-# CHECK-ASM: encoding: [0xb4,0xe9,0x13,0x00] +-masknez $t8, $t1, $s3 +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: clo.d $s6, $ra +-# CHECK64-ASM: encoding: [0x3d,0x20,0x00,0x00] +-clo.d $s6, $ra +- +-# CHECK64-ASM-AND-OBJ: clz.d $s3, $s3 +-# CHECK64-ASM: encoding: [0x5a,0x27,0x00,0x00] +-clz.d $s3, $s3 +- +-# CHECK64-ASM-AND-OBJ: cto.d $t6, $t8 +-# CHECK64-ASM: encoding: [0x92,0x2a,0x00,0x00] +-cto.d $t6, $t8 +- +-# CHECK64-ASM-AND-OBJ: ctz.d $t5, $a6 +-# CHECK64-ASM: encoding: [0x51,0x2d,0x00,0x00] +-ctz.d $t5, $a6 +- +-# CHECK64-ASM-AND-OBJ: bytepick.d $t3, $t5, $t8, 4 +-# CHECK64-ASM: encoding: [0x2f,0x52,0x0e,0x00] +-bytepick.d $t3, $t5, $t8, 4 +- +-# CHECK64-ASM-AND-OBJ: revb.4h $t1, $t7 +-# CHECK64-ASM: encoding: [0x6d,0x36,0x00,0x00] +-revb.4h $t1, $t7 +- +-# CHECK64-ASM-AND-OBJ: revb.2w $s5, $s4 +-# CHECK64-ASM: encoding: [0x7c,0x3b,0x00,0x00] +-revb.2w $s5, $s4 +- +-# CHECK64-ASM-AND-OBJ: revb.d $zero, $s0 +-# CHECK64-ASM: encoding: [0xe0,0x3e,0x00,0x00] +-revb.d $zero, $s0 +- +-# CHECK64-ASM-AND-OBJ: revh.2w $s5, $a6 +-# CHECK64-ASM: encoding: [0x5c,0x41,0x00,0x00] +-revh.2w $s5, $a6 +- +-# CHECK64-ASM-AND-OBJ: revh.d $a5, $a3 +-# CHECK64-ASM: encoding: [0xe9,0x44,0x00,0x00] +-revh.d $a5, $a3 +- +-# CHECK64-ASM-AND-OBJ: bitrev.8b $t1, $s2 +-# CHECK64-ASM: encoding: [0x2d,0x4f,0x00,0x00] +-bitrev.8b $t1, $s2 +- +-# CHECK64-ASM-AND-OBJ: bitrev.d $t7, $s0 +-# CHECK64-ASM: encoding: [0xf3,0x56,0x00,0x00] +-bitrev.d $t7, $s0 +- +-# CHECK64-ASM-AND-OBJ: bstrins.d $a4, $a7, 7, 2 +-# CHECK64-ASM: encoding: [0x68,0x09,0x87,0x00] +-bstrins.d $a4, $a7, 7, 2 +- +-# CHECK64-ASM-AND-OBJ: bstrpick.d $s8, $s4, 39, 22 +-# CHECK64-ASM: encoding: [0x7f,0x5b,0xe7,0x00] +-bstrpick.d $s8, $s4, 39, 22 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s b/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s +deleted file mode 100644 +index 4b8f00a70..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s ++++ /dev/null +@@ -1,88 +0,0 @@ +-## Test valid bit shift instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: sll.w $s1, $s4, $s0 +-# CHECK-ASM: encoding: [0x78,0x5f,0x17,0x00] +-sll.w $s1, $s4, $s0 +- +-# CHECK-ASM-AND-OBJ: srl.w $s8, $t5, $a3 +-# CHECK-ASM: encoding: [0x3f,0x9e,0x17,0x00] +-srl.w $s8, $t5, $a3 +- +-# CHECK-ASM-AND-OBJ: sra.w $t0, $s5, $a6 +-# CHECK-ASM: encoding: [0x8c,0x2b,0x18,0x00] +-sra.w $t0, $s5, $a6 +- +-# CHECK-ASM-AND-OBJ: rotr.w $ra, $s3, $t6 +-# CHECK-ASM: encoding: [0x41,0x4b,0x1b,0x00] +-rotr.w $ra, $s3, $t6 +- +-# CHECK-ASM-AND-OBJ: slli.w $s3, $t6, 0 +-# CHECK-ASM: encoding: [0x5a,0x82,0x40,0x00] +-slli.w $s3, $t6, 0 +- +-# CHECK-ASM-AND-OBJ: srli.w $a6, $t2, 30 +-# CHECK-ASM: encoding: [0xca,0xf9,0x44,0x00] +-srli.w $a6, $t2, 30 +- +-# CHECK-ASM-AND-OBJ: srai.w $a4, $t5, 24 +-# CHECK-ASM: encoding: [0x28,0xe2,0x48,0x00] +-srai.w $a4, $t5, 24 +- +-# CHECK-ASM-AND-OBJ: rotri.w $s0, $t8, 23 +-# CHECK-ASM: encoding: [0x97,0xde,0x4c,0x00] +-rotri.w $s0, $t8, 23 +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: sll.d $t8, $t3, $sp +-# CHECK64-ASM: encoding: [0xf4,0x8d,0x18,0x00] +-sll.d $t8, $t3, $sp +- +-# CHECK64-ASM-AND-OBJ: srl.d $t2, $s2, $zero +-# CHECK64-ASM: encoding: [0x2e,0x03,0x19,0x00] +-srl.d $t2, $s2, $zero +- +-# CHECK64-ASM-AND-OBJ: sra.d $a3, $fp, $s8 +-# CHECK64-ASM: encoding: [0xc7,0xfe,0x19,0x00] +-sra.d $a3, $fp, $s8 +- +-# CHECK64-ASM-AND-OBJ: rotr.d $s8, $sp, $ra +-# CHECK64-ASM: encoding: [0x7f,0x84,0x1b,0x00] +-rotr.d $s8, $sp, $ra +- +-# CHECK64-ASM-AND-OBJ: slli.d $a6, $s8, 39 +-# CHECK64-ASM: encoding: [0xea,0x9f,0x41,0x00] +-slli.d $a6, $s8, 39 +- +-# CHECK64-ASM-AND-OBJ: srli.d $s8, $fp, 38 +-# CHECK64-ASM: encoding: [0xdf,0x9a,0x45,0x00] +-srli.d $s8, $fp, 38 +- +-# CHECK64-ASM-AND-OBJ: srai.d $a5, $r21, 27 +-# CHECK64-ASM: encoding: [0xa9,0x6e,0x49,0x00] +-srai.d $a5, $r21, 27 +- +-# CHECK64-ASM-AND-OBJ: rotri.d $s6, $zero, 7 +-# CHECK64-ASM: encoding: [0x1d,0x1c,0x4d,0x00] +-rotri.d $s6, $zero, 7 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s b/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s +deleted file mode 100644 +index cfb7e4ba8..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s ++++ /dev/null +@@ -1,71 +0,0 @@ +-## Test valid boundary check memory access instructions. +- +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +- +-# CHECK-ASM-AND-OBJ: ldgt.b $a2, $a2, $s6 +-# CHECK-ASM: encoding: [0xc6,0x74,0x78,0x38] +-ldgt.b $a2, $a2, $s6 +- +-# CHECK-ASM-AND-OBJ: ldgt.h $a1, $s8, $ra +-# CHECK-ASM: encoding: [0xe5,0x87,0x78,0x38] +-ldgt.h $a1, $s8, $ra +- +-# CHECK-ASM-AND-OBJ: ldgt.w $t3, $s3, $a4 +-# CHECK-ASM: encoding: [0x4f,0x23,0x79,0x38] +-ldgt.w $t3, $s3, $a4 +- +-# CHECK-ASM-AND-OBJ: ldgt.d $s0, $s2, $s8 +-# CHECK-ASM: encoding: [0x37,0xff,0x79,0x38] +-ldgt.d $s0, $s2, $s8 +- +-# CHECK-ASM-AND-OBJ: ldle.b $a5, $t0, $t3 +-# CHECK-ASM: encoding: [0x89,0x3d,0x7a,0x38] +-ldle.b $a5, $t0, $t3 +- +-# CHECK-ASM-AND-OBJ: ldle.h $a7, $a7, $s0 +-# CHECK-ASM: encoding: [0x6b,0xdd,0x7a,0x38] +-ldle.h $a7, $a7, $s0 +- +-# CHECK-ASM-AND-OBJ: ldle.w $s1, $tp, $tp +-# CHECK-ASM: encoding: [0x58,0x08,0x7b,0x38] +-ldle.w $s1, $tp, $tp +- +-# CHECK-ASM-AND-OBJ: ldle.d $t8, $t3, $t4 +-# CHECK-ASM: encoding: [0xf4,0xc1,0x7b,0x38] +-ldle.d $t8, $t3, $t4 +- +-# CHECK-ASM-AND-OBJ: stgt.b $s4, $t7, $t8 +-# CHECK-ASM: encoding: [0x7b,0x52,0x7c,0x38] +-stgt.b $s4, $t7, $t8 +- +-# CHECK-ASM-AND-OBJ: stgt.h $t4, $a0, $a2 +-# CHECK-ASM: encoding: [0x90,0x98,0x7c,0x38] +-stgt.h $t4, $a0, $a2 +- +-# CHECK-ASM-AND-OBJ: stgt.w $s8, $s5, $t2 +-# CHECK-ASM: encoding: [0x9f,0x3b,0x7d,0x38] +-stgt.w $s8, $s5, $t2 +- +-# CHECK-ASM-AND-OBJ: stgt.d $s7, $r21, $s1 +-# CHECK-ASM: encoding: [0xbe,0xe2,0x7d,0x38] +-stgt.d $s7, $r21, $s1 +- +-# CHECK-ASM-AND-OBJ: stle.b $a6, $a0, $t4 +-# CHECK-ASM: encoding: [0x8a,0x40,0x7e,0x38] +-stle.b $a6, $a0, $t4 +- +-# CHECK-ASM-AND-OBJ: stle.h $t5, $t5, $r21 +-# CHECK-ASM: encoding: [0x31,0xd6,0x7e,0x38] +-stle.h $t5, $t5, $r21 +- +-# CHECK-ASM-AND-OBJ: stle.w $s0, $s5, $s6 +-# CHECK-ASM: encoding: [0x97,0x77,0x7f,0x38] +-stle.w $s0, $s5, $s6 +- +-# CHECK-ASM-AND-OBJ: stle.d $s2, $s1, $s6 +-# CHECK-ASM: encoding: [0x19,0xf7,0x7f,0x38] +-stle.d $s2, $s1, $s6 +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/branch.s b/llvm/test/MC/LoongArch/Basic/Integer/branch.s +deleted file mode 100644 +index c4e8edf81..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/branch.s ++++ /dev/null +@@ -1,55 +0,0 @@ +-## Test valid branch instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +- +-# CHECK-ASM-AND-OBJ: beq $a6, $a3, 176 +-# CHECK-ASM: encoding: [0x47,0xb1,0x00,0x58] +-beq $a6, $a3, 176 +- +-# CHECK-ASM-AND-OBJ: bne $s2, $ra, 136 +-# CHECK-ASM: encoding: [0x21,0x8b,0x00,0x5c] +-bne $s2, $ra, 136 +- +-# CHECK-ASM-AND-OBJ: blt $t3, $s7, 168 +-# CHECK-ASM: encoding: [0xfe,0xa9,0x00,0x60] +-blt $t3, $s7, 168 +- +-# CHECK-ASM-AND-OBJ: bge $t0, $t3, 148 +-# CHECK-ASM: encoding: [0x8f,0x95,0x00,0x64] +-bge $t0, $t3, 148 +- +-# CHECK-ASM-AND-OBJ: bltu $t5, $a1, 4 +-# CHECK-ASM: encoding: [0x25,0x06,0x00,0x68] +-bltu $t5, $a1, 4 +- +-# CHECK-ASM-AND-OBJ: bgeu $a2, $s0, 140 +-# CHECK-ASM: encoding: [0xd7,0x8c,0x00,0x6c] +-bgeu $a2, $s0, 140 +- +-# CHECK-ASM-AND-OBJ: beqz $a5, 96 +-# CHECK-ASM: encoding: [0x20,0x61,0x00,0x40] +-beqz $a5, 96 +- +-# CHECK-ASM-AND-OBJ: bnez $sp, 212 +-# CHECK-ASM: encoding: [0x60,0xd4,0x00,0x44] +-bnez $sp, 212 +- +-# CHECK-ASM-AND-OBJ: b 248 +-# CHECK-ASM: encoding: [0x00,0xf8,0x00,0x50] +-b 248 +- +-# CHECK-ASM-AND-OBJ: bl 236 +-# CHECK-ASM: encoding: [0x00,0xec,0x00,0x54] +-bl 236 +- +-# CHECK-ASM-AND-OBJ: jirl $ra, $a0, 4 +-# CHECK-ASM: encoding: [0x81,0x04,0x00,0x4c] +-jirl $ra, $a0, 4 +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/crc.s b/llvm/test/MC/LoongArch/Basic/Integer/crc.s +deleted file mode 100644 +index e57134d60..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/crc.s ++++ /dev/null +@@ -1,39 +0,0 @@ +-## Test valid CRC check instructions. +- +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +- +-# CHECK-ASM-AND-OBJ: crc.w.b.w $s1, $a3, $tp +-# CHECK-ASM: encoding: [0xf8,0x08,0x24,0x00] +-crc.w.b.w $s1, $a3, $tp +- +-# CHECK-ASM-AND-OBJ: crc.w.h.w $s8, $a6, $t6 +-# CHECK-ASM: encoding: [0x5f,0xc9,0x24,0x00] +-crc.w.h.w $s8, $a6, $t6 +- +-# CHECK-ASM-AND-OBJ: crc.w.w.w $s5, $a2, $a6 +-# CHECK-ASM: encoding: [0xdc,0x28,0x25,0x00] +-crc.w.w.w $s5, $a2, $a6 +- +-# CHECK-ASM-AND-OBJ: crc.w.d.w $s5, $a7, $s8 +-# CHECK-ASM: encoding: [0x7c,0xfd,0x25,0x00] +-crc.w.d.w $s5, $a7, $s8 +- +-# CHECK-ASM-AND-OBJ: crcc.w.b.w $t3, $t6, $sp +-# CHECK-ASM: encoding: [0x4f,0x0e,0x26,0x00] +-crcc.w.b.w $t3, $t6, $sp +- +-# CHECK-ASM-AND-OBJ: crcc.w.h.w $r21, $s6, $t6 +-# CHECK-ASM: encoding: [0xb5,0xcb,0x26,0x00] +-crcc.w.h.w $r21, $s6, $t6 +- +-# CHECK-ASM-AND-OBJ: crcc.w.w.w $t5, $t2, $t1 +-# CHECK-ASM: encoding: [0xd1,0x35,0x27,0x00] +-crcc.w.w.w $t5, $t2, $t1 +- +-# CHECK-ASM-AND-OBJ: crcc.w.d.w $s7, $r21, $s4 +-# CHECK-ASM: encoding: [0xbe,0xee,0x27,0x00] +-crcc.w.d.w $s7, $r21, $s4 +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s +deleted file mode 100644 +index 5aa79ca80..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s ++++ /dev/null +@@ -1,10 +0,0 @@ +-# Test that disassembler rejects data smaller than 4 bytes. +- +-# RUN: llvm-mc --filetype=obj --triple=loongarch32 < %s \ +-# RUN: | llvm-objdump -d - | FileCheck %s +-# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ +-# RUN: | llvm-objdump -d - | FileCheck %s +- +-# CHECK: 11 +-# CHECK: 22 +-.2byte 0x2211 +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s +deleted file mode 100644 +index 94b3976f5..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s ++++ /dev/null +@@ -1,191 +0,0 @@ +-## Test invalid instructions on both loongarch32 and loongarch64 target. +- +-# RUN: not llvm-mc --triple=loongarch32 --mattr=-f %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK64 +-# RUN: not llvm-mc --triple=loongarch64 --mattr=-f %s 2>&1 --defsym=LA64=1 | FileCheck %s +- +-## Out of range immediates +-## uimm2 +-bytepick.w $a0, $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] +-bytepick.w $a0, $a0, $a0, 4 +-# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] +- +-## uimm2_plus1 +-alsl.w $a0, $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] +-alsl.w $a0, $a0, $a0, 5 +-# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] +- +-## uimm5 +-slli.w $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] +-srli.w $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] +-srai.w $a0, $a0, 32 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] +-rotri.w $a0, $a0, 32 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [0, 31] +-bstrins.w $a0, $a0, 31, -1 +-# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] +-bstrpick.w $a0, $a0, 32, 0 +-# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] +-preld -1, $a0, 0 +-# CHECK: :[[#@LINE-1]]:7: error: immediate must be an integer in the range [0, 31] +-preld 32, $a0, 0 +-# CHECK: :[[#@LINE-1]]:7: error: immediate must be an integer in the range [0, 31] +- +-## uimm12 +-andi $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] +-ori $a0, $a0, 4096 +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [0, 4095] +-xori $a0, $a0, 4096 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] +- +-## simm12 +-addi.w $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] +-slti $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-sltui $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +-preld 0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [-2048, 2047] +-ld.b $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-ld.h $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-ld.w $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-ld.bu $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +-ld.hu $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +-st.b $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-st.h $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-st.w $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +- +-## simm14_lsl2 +-ll.w $a0, $a0, -32772 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-ll.w $a0, $a0, -32769 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-sc.w $a0, $a0, 32767 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-sc.w $a0, $a0, 32768 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +- +-## simm16_lsl2 +-beq $a0, $a0, -0x20004 +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-bne $a0, $a0, -0x20004 +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-blt $a0, $a0, -0x1FFFF +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-bge $a0, $a0, -0x1FFFF +-# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-bltu $a0, $a0, 0x1FFFF +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-bgeu $a0, $a0, 0x1FFFF +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +-jirl $a0, $a0, 0x20000 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +- +-## simm20 +-lu12i.w $a0, -0x80001 +-# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] +-pcaddi $a0, -0x80001 +-# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-524288, 524287] +-pcaddu12i $a0, 0x80000 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +-pcalau12i $a0, 0x80000 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +- +-## simm21_lsl2 +-beqz $a0, -0x400001 +-# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +-bnez $a0, -0x3FFFFF +-# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +-beqz $a0, 0x3FFFFF +-# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +-bnez $a0, 0x400000 +-# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +- +-## simm26_lsl2 +-b -0x8000001 +-# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +-b 0x1 +-# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +-bl 0x7FFFFFF +-# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +-bl 0x8000000 +-# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +- +-## Invalid mnemonics +-nori $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic +-andni $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic +-orni $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic +- +-## Invalid register names +-add.w $foo, $a0, $a0 +-# CHECK: :[[#@LINE-1]]:8: error: invalid operand for instruction +-sub.w $a8, $a0, $a0 +-# CHECK: :[[#@LINE-1]]:8: error: invalid operand for instruction +-addi.w $x0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:9: error: invalid operand for instruction +-alsl.w $t9, $a0, $a0, 1 +-# CHECK: :[[#@LINE-1]]:9: error: invalid operand for instruction +-lu12i.w $s10, 0 +-# CHECK: :[[#@LINE-1]]:10: error: invalid operand for instruction +- +-.ifndef LA64 +-## LoongArch64 mnemonics +-add.d $a0, $a0, $a0 +-# CHECK64: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +-addi.d $a0, $a0, 0 +-# CHECK64: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +-.endif +- +-## Invalid operand types +-slt $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:15: error: invalid operand for instruction +-slti $a0, 0, 0 +-# CHECK: :[[#@LINE-1]]:11: error: invalid operand for instruction +- +-## Too many operands +-andi $a0, $a0, 0, 0 +-# CHECK: :[[#@LINE-1]]:19: error: invalid operand for instruction +- +-## Too few operands +-and $a0, $a0 +-# CHECK: :[[#@LINE-1]]:1: error: too few operands for instruction +-andi $a0, $a0 +-# CHECK: :[[#@LINE-1]]:1: error: too few operands for instruction +- +-## Instructions outside the base integer ISA +-## TODO: Test instructions in LSX/LASX/LBT/LVZ after their introduction. +- +-## Floating-Point mnemonics +-fadd.s $fa0, $fa0, $fa0 +-# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'F' (Single-Precision Floating-Point) +-fadd.d $fa0, $fa0, $fa0 +-# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) +- +-## Using floating point registers when integer registers are expected +-sll.w $a0, $a0, $fa0 +-# CHECK: :[[#@LINE-1]]:18: error: invalid operand for instruction +- +-## msbw < lsbw +-# CHECK: :[[#@LINE+1]]:21: error: msb is less than lsb +-bstrins.w $a0, $a0, 1, 2 +-# CHECK: ^~~~ +- +-# CHECK: :[[#@LINE+1]]:22: error: msb is less than lsb +-bstrpick.w $a0, $a0, 30, 31 +-# CHECK: ^~~~~~ +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +deleted file mode 100644 +index a8b175a88..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s ++++ /dev/null +@@ -1,77 +0,0 @@ +-## Test invalid instructions on loongarch64 target. +- +-# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s +- +-## Out of range immediates +-## uimm2_plus1 +-alsl.wu $a0, $a0, $a0, 0 +-# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [1, 4] +-alsl.d $a0, $a0, $a0, 5 +-# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] +- +-## uimm3 +-bytepick.d $a0, $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] +-bytepick.d $a0, $a0, $a0, 8 +-# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] +- +-## uimm6 +-slli.d $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] +-srli.d $a0, $a0, -1 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] +-srai.d $a0, $a0, 64 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] +-rotri.d $a0, $a0, 64 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [0, 63] +-bstrins.d $a0, $a0, 63, -1 +-# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] +-bstrpick.d $a0, $a0, 64, 0 +-# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] +- +-## simm12 +-addi.d $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] +-lu52i.d $a0, $a0, -2049 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [-2048, 2047] +-ld.wu $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +-ld.d $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +-st.d $a0, $a0, 2048 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +- +-## simm14_lsl2 +-ldptr.w $a0, $a0, -32772 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-ldptr.d $a0, $a0, -32772 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-stptr.w $a0, $a0, -32769 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-stptr.d $a0, $a0, -32769 +-# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-ll.w $a0, $a0, 32767 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +-sc.w $a0, $a0, 32768 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] +- +-## simm16 +-addu16i.d $a0, $a0, -32769 +-# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-32768, 32767] +-addu16i.d $a0, $a0, 32768 +-# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-32768, 32767] +- +-## simm20 +-lu32i.d $a0, -0x80001 +-# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] +-pcaddu18i $a0, 0x80000 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +- +-## msbd < lsbd +-# CHECK: :[[#@LINE+1]]:21: error: msb is less than lsb +-bstrins.d $a0, $a0, 1, 2 +-# CHECK: ^~~~ +- +-# CHECK: :[[#@LINE+1]]:22: error: msb is less than lsb +-bstrpick.d $a0, $a0, 32, 63 +-# CHECK: ^~~~~~ +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/memory.s b/llvm/test/MC/LoongArch/Basic/Integer/memory.s +deleted file mode 100644 +index 1d363d44d..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/memory.s ++++ /dev/null +@@ -1,132 +0,0 @@ +-## Test valid memory access instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: ld.b $s1, $a4, 21 +-# CHECK-ASM: encoding: [0x18,0x55,0x00,0x28] +-ld.b $s1, $a4, 21 +- +-# CHECK-ASM-AND-OBJ: ld.h $a3, $t6, 80 +-# CHECK-ASM: encoding: [0x47,0x42,0x41,0x28] +-ld.h $a3, $t6, 80 +- +-# CHECK-ASM-AND-OBJ: ld.w $t6, $s3, 92 +-# CHECK-ASM: encoding: [0x52,0x73,0x81,0x28] +-ld.w $t6, $s3, 92 +- +-# CHECK-ASM-AND-OBJ: ld.bu $t1, $t1, 150 +-# CHECK-ASM: encoding: [0xad,0x59,0x02,0x2a] +-ld.bu $t1, $t1, 150 +- +-# CHECK-ASM-AND-OBJ: ld.hu $t6, $s6, 198 +-# CHECK-ASM: encoding: [0xb2,0x1b,0x43,0x2a] +-ld.hu $t6, $s6, 198 +- +-# CHECK-ASM-AND-OBJ: st.b $sp, $a3, 95 +-# CHECK-ASM: encoding: [0xe3,0x7c,0x01,0x29] +-st.b $sp, $a3, 95 +- +-# CHECK-ASM-AND-OBJ: st.h $s2, $t4, 122 +-# CHECK-ASM: encoding: [0x19,0xea,0x41,0x29] +-st.h $s2, $t4, 122 +- +-# CHECK-ASM-AND-OBJ: st.w $t1, $t1, 175 +-# CHECK-ASM: encoding: [0xad,0xbd,0x82,0x29] +-st.w $t1, $t1, 175 +- +-# CHECK-ASM-AND-OBJ: preld 10, $zero, 23 +-# CHECK-ASM: encoding: [0x0a,0x5c,0xc0,0x2a] +-preld 10, $zero, 23 +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: ld.wu $t2, $t7, 31 +-# CHECK64-ASM: encoding: [0x6e,0x7e,0x80,0x2a] +-ld.wu $t2, $t7, 31 +- +-# CHECK: ld.d $t6, $t8, 159 +-# CHECK: encoding: [0x92,0x7e,0xc2,0x28] +-ld.d $t6, $t8, 159 +- +-# CHECK64-ASM-AND-OBJ: st.d $s7, $s7, 60 +-# CHECK64-ASM: encoding: [0xde,0xf3,0xc0,0x29] +-st.d $s7, $s7, 60 +- +-# CHECK64-ASM-AND-OBJ: ldx.b $s1, $ra, $tp +-# CHECK64-ASM: encoding: [0x38,0x08,0x00,0x38] +-ldx.b $s1, $ra, $tp +- +-# CHECK64-ASM-AND-OBJ: ldx.h $fp, $fp, $t5 +-# CHECK64-ASM: encoding: [0xd6,0x46,0x04,0x38] +-ldx.h $fp, $fp, $t5 +- +-# CHECK64-ASM-AND-OBJ: ldx.w $s2, $a7, $s0 +-# CHECK64-ASM: encoding: [0x79,0x5d,0x08,0x38] +-ldx.w $s2, $a7, $s0 +- +-# CHECK64-ASM-AND-OBJ: ldx.d $t6, $s0, $t8 +-# CHECK64-ASM: encoding: [0xf2,0x52,0x0c,0x38] +-ldx.d $t6, $s0, $t8 +- +-# CHECK64-ASM-AND-OBJ: ldx.bu $a7, $a5, $a5 +-# CHECK64-ASM: encoding: [0x2b,0x25,0x20,0x38] +-ldx.bu $a7, $a5, $a5 +- +-# CHECK64-ASM-AND-OBJ: ldx.hu $fp, $s0, $s4 +-# CHECK64-ASM: encoding: [0xf6,0x6e,0x24,0x38] +-ldx.hu $fp, $s0, $s4 +- +-# CHECK64-ASM-AND-OBJ: ldx.wu $a4, $s1, $s5 +-# CHECK64-ASM: encoding: [0x08,0x73,0x28,0x38] +-ldx.wu $a4, $s1, $s5 +- +-# CHECK64-ASM-AND-OBJ: stx.b $t7, $ra, $sp +-# CHECK64-ASM: encoding: [0x33,0x0c,0x10,0x38] +-stx.b $t7, $ra, $sp +- +-# CHECK64-ASM-AND-OBJ: stx.h $zero, $s5, $s3 +-# CHECK64-ASM: encoding: [0x80,0x6b,0x14,0x38] +-stx.h $zero, $s5, $s3 +- +-# CHECK64-ASM-AND-OBJ: stx.w $a3, $a0, $s8 +-# CHECK64-ASM: encoding: [0x87,0x7c,0x18,0x38] +-stx.w $a3, $a0, $s8 +- +-# CHECK64-ASM-AND-OBJ: stx.d $a3, $s8, $a6 +-# CHECK64-ASM: encoding: [0xe7,0x2b,0x1c,0x38] +-stx.d $a3, $s8, $a6 +- +-# CHECK64-ASM-AND-OBJ: ldptr.w $s3, $a2, 60 +-# CHECK64-ASM: encoding: [0xda,0x3c,0x00,0x24] +-ldptr.w $s3, $a2, 60 +- +-# CHECK64-ASM-AND-OBJ: ldptr.d $a1, $s6, 244 +-# CHECK64-ASM: encoding: [0xa5,0xf7,0x00,0x26] +-ldptr.d $a1, $s6, 244 +- +-# CHECK64-ASM-AND-OBJ: stptr.w $s5, $a1, 216 +-# CHECK64-ASM: encoding: [0xbc,0xd8,0x00,0x25] +-stptr.w $s5, $a1, 216 +- +-# CHECK64-ASM-AND-OBJ: stptr.d $t2, $s1, 196 +-# CHECK64-ASM: encoding: [0x0e,0xc7,0x00,0x27] +-stptr.d $t2, $s1, 196 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/misc.s b/llvm/test/MC/LoongArch/Basic/Integer/misc.s +deleted file mode 100644 +index 182d1da9b..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/misc.s ++++ /dev/null +@@ -1,56 +0,0 @@ +-## Test valid misc instructions. +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: syscall 100 +-# CHECK-ASM: encoding: [0x64,0x00,0x2b,0x00] +-syscall 100 +- +-# CHECK-ASM-AND-OBJ: break 199 +-# CHECK-ASM: encoding: [0xc7,0x00,0x2a,0x00] +-break 199 +- +-# CHECK-ASM-AND-OBJ: rdtimel.w $s1, $a0 +-# CHECK-ASM: encoding: [0x98,0x60,0x00,0x00] +-rdtimel.w $s1, $a0 +- +-# CHECK-ASM-AND-OBJ: rdtimeh.w $a7, $a1 +-# CHECK-ASM: encoding: [0xab,0x64,0x00,0x00] +-rdtimeh.w $a7, $a1 +- +-# CHECK-ASM-AND-OBJ: cpucfg $sp, $a4 +-# CHECK-ASM: encoding: [0x03,0x6d,0x00,0x00] +-cpucfg $sp, $a4 +- +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: asrtle.d $t0, $t5 +-# CHECK64-ASM: encoding: [0x80,0x45,0x01,0x00] +-asrtle.d $t0, $t5 +- +-# CHECK64-ASM-AND-OBJ: asrtgt.d $t8, $t8 +-# CHECK64-ASM: encoding: [0x80,0xd2,0x01,0x00] +-asrtgt.d $t8, $t8 +- +-# CHECK64-ASM-AND-OBJ: rdtime.d $tp, $t3 +-# CHECK64-ASM: encoding: [0xe2,0x69,0x00,0x00] +-rdtime.d $tp, $t3 +- +-.endif +- +diff --git a/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s b/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s +deleted file mode 100644 +index e718982f3..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s ++++ /dev/null +@@ -1,18 +0,0 @@ +-## Test valid pseudo instructions +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +- +-# CHECK-ASM-AND-OBJ: nop +-# CHECK-ASM: encoding: [0x00,0x00,0x40,0x03] +-nop +- +-# CHECK-ASM-AND-OBJ: move $a4, $a5 +-# CHECK-ASM: encoding: [0x28,0x01,0x15,0x00] +-move $a4, $a5 +diff --git a/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s b/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s +deleted file mode 100644 +index 380b848a6..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s ++++ /dev/null +@@ -1,14 +0,0 @@ +-# RUN: not llvm-mc --triple=loongarch32 %s 2>&1 | FileCheck %s --check-prefixes=ERR,ERR32 +-# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s --check-prefix=ERR +- +-## csrxchg: rj != 0,1 +-csrxchg $a0, $zero, 0 +-# ERR: :[[#@LINE-1]]:15: error: must not be $r0 or $r1 +-csrxchg $a0, $ra, 0 +-# ERR: :[[#@LINE-1]]:15: error: must not be $r0 or $r1 +- +-## LoongArch64 mnemonics +-iocsrrd.d $a0, $a1 +-# ERR32: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +-iocsrwr.d $a0, $a1 +-# ERR32: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set +diff --git a/llvm/test/MC/LoongArch/Basic/Privilege/valid.s b/llvm/test/MC/LoongArch/Basic/Privilege/valid.s +deleted file mode 100644 +index 1d5ca6866..000000000 +--- a/llvm/test/MC/LoongArch/Basic/Privilege/valid.s ++++ /dev/null +@@ -1,118 +0,0 @@ +-## Test valid privilege instructions +- +-# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +-# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ +-# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s +- +-############################################################# +-## Instructions for both loongarch32 and loongarch64 +-############################################################# +- +-# CHECK-ASM-AND-OBJ: csrrd $s3, 30 +-# CHECK-ASM: encoding: [0x1a,0x78,0x00,0x04] +-csrrd $s3, 30 +- +-# CHECK-ASM-AND-OBJ: csrwr $s1, 194 +-# CHECK-ASM: encoding: [0x38,0x08,0x03,0x04] +-csrwr $s1, 194 +- +-# CHECK-ASM-AND-OBJ: csrxchg $a2, $s4, 214 +-# CHECK-ASM: encoding: [0x66,0x5b,0x03,0x04] +-csrxchg $a2, $s4, 214 +- +-# CHECK-ASM-AND-OBJ: iocsrrd.b $s3, $s1 +-# CHECK-ASM: encoding: [0x1a,0x03,0x48,0x06] +-iocsrrd.b $s3, $s1 +- +-# CHECK-ASM-AND-OBJ: iocsrrd.h $a1, $s4 +-# CHECK-ASM: encoding: [0x65,0x07,0x48,0x06] +-iocsrrd.h $a1, $s4 +- +-# CHECK-ASM-AND-OBJ: iocsrrd.w $a6, $t8 +-# CHECK-ASM: encoding: [0x8a,0x0a,0x48,0x06] +-iocsrrd.w $a6, $t8 +- +-# CHECK-ASM-AND-OBJ: iocsrwr.b $a0, $s0 +-# CHECK-ASM: encoding: [0xe4,0x12,0x48,0x06] +-iocsrwr.b $a0, $s0 +- +-# CHECK-ASM-AND-OBJ: iocsrwr.h $a7, $zero +-# CHECK-ASM: encoding: [0x0b,0x14,0x48,0x06] +-iocsrwr.h $a7, $zero +- +-# CHECK-ASM-AND-OBJ: iocsrwr.w $t8, $s3 +-# CHECK-ASM: encoding: [0x54,0x1b,0x48,0x06] +-iocsrwr.w $t8, $s3 +- +-# CHECK-ASM-AND-OBJ: cacop 0, $a6, 27 +-# CHECK-ASM: encoding: [0x40,0x6d,0x00,0x06] +-cacop 0, $a6, 27 +- +-# CHECK-ASM-AND-OBJ: tlbclr +-# CHECK-ASM: encoding: [0x00,0x20,0x48,0x06] +-tlbclr +- +-# CHECK-ASM-AND-OBJ: tlbflush +-# CHECK-ASM: encoding: [0x00,0x24,0x48,0x06] +-tlbflush +- +-# CHECK-ASM-AND-OBJ: tlbsrch +-# CHECK-ASM: encoding: [0x00,0x28,0x48,0x06] +-tlbsrch +- +-# CHECK-ASM-AND-OBJ: tlbrd +-# CHECK-ASM: encoding: [0x00,0x2c,0x48,0x06] +-tlbrd +- +-# CHECK-ASM-AND-OBJ: tlbwr +-# CHECK-ASM: encoding: [0x00,0x30,0x48,0x06] +-tlbwr +- +-# CHECK-ASM-AND-OBJ: tlbfill +-# CHECK-ASM: encoding: [0x00,0x34,0x48,0x06] +-tlbfill +- +-# CHECK-ASM-AND-OBJ: invtlb 16, $s6, $s2 +-# CHECK-ASM: encoding: [0xb0,0xe7,0x49,0x06] +-invtlb 16, $s6, $s2 +- +-# CHECK-ASM-AND-OBJ: lddir $t0, $s7, 92 +-# CHECK-ASM: encoding: [0xcc,0x73,0x41,0x06] +-lddir $t0, $s7, 92 +- +-# CHECK-ASM-AND-OBJ: ldpte $t6, 200 +-# CHECK-ASM: encoding: [0x40,0x22,0x47,0x06] +-ldpte $t6, 200 +- +-# CHECK-ASM-AND-OBJ: ertn +-# CHECK-ASM: encoding: [0x00,0x38,0x48,0x06] +-ertn +- +-# CHECK-ASM-AND-OBJ: dbcl 201 +-# CHECK-ASM: encoding: [0xc9,0x80,0x2a,0x00] +-dbcl 201 +- +-# CHECK-ASM-AND-OBJ: idle 204 +-# CHECK-ASM: encoding: [0xcc,0x80,0x48,0x06] +-idle 204 +- +-############################################################# +-## Instructions only for loongarch64 +-############################################################# +- +-.ifdef LA64 +- +-# CHECK64-ASM-AND-OBJ: iocsrrd.d $t5, $s2 +-# CHECK64-ASM: encoding: [0x31,0x0f,0x48,0x06] +-iocsrrd.d $t5, $s2 +- +-# CHECK64-ASM-AND-OBJ: iocsrwr.d $t8, $a3 +-# CHECK64-ASM: encoding: [0xf4,0x1c,0x48,0x06] +-iocsrwr.d $t8, $a3 +- +-.endif +diff --git a/llvm/test/MC/LoongArch/Directives/cfi.s b/llvm/test/MC/LoongArch/Directives/cfi.s +deleted file mode 100644 +index 7101fc907..000000000 +--- a/llvm/test/MC/LoongArch/Directives/cfi.s ++++ /dev/null +@@ -1,34 +0,0 @@ +-## Test cfi directives. +- +-# RUN: llvm-mc %s --triple=loongarch32 | FileCheck %s +-# RUN: llvm-mc %s --triple=loongarch64 | FileCheck %s +-# RUN: not llvm-mc --triple=loongarch32 --defsym=ERR=1 < %s 2>&1 \ +-# RUN: | FileCheck %s --check-prefix=CHECK-ERR +-# RUN: not llvm-mc --triple=loongarch64 --defsym=ERR=1 < %s 2>&1 \ +-# RUN: | FileCheck %s --check-prefix=CHECK-ERR +- +-# CHECK: .cfi_startproc +-.cfi_startproc +-# CHECK-NEXT: .cfi_offset 0, 0 +-.cfi_offset 0, 0 +-# CHECK-NEXT: .cfi_offset 9, 8 +-.cfi_offset 9, 8 +-# CHECK-NEXT: .cfi_offset 31, 16 +-.cfi_offset 31, 16 +-# CHECK-NEXT: .cfi_endproc +-.cfi_endproc +- +-.ifdef ERR +-.cfi_startproc +-# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number +-.cfi_offset -22, -8 +-# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number +-.cfi_offset fp, -8 +-# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number +-.cfi_offset $22, -8 +-# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number +-.cfi_offset $r22, -8 +-# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number +-.cfi_offset $fp, -8 +-.cfi_endproc +-.endif +diff --git a/llvm/test/MC/LoongArch/Directives/data.s b/llvm/test/MC/LoongArch/Directives/data.s +deleted file mode 100644 +index e3c66d10b..000000000 +--- a/llvm/test/MC/LoongArch/Directives/data.s ++++ /dev/null +@@ -1,102 +0,0 @@ +-## Test data directives. +-# RUN: llvm-mc --triple=loongarch32 < %s \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM %s +-# RUN: llvm-mc --triple=loongarch64 < %s \ +-# RUN: | FileCheck --check-prefix=CHECK-ASM %s +-# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | llvm-objdump -s - \ +-# RUN: | FileCheck --check-prefix=CHECK-DATA %s +-# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | llvm-objdump -s - \ +-# RUN: | FileCheck --check-prefix=CHECK-DATA %s +-# RUN: not llvm-mc --triple=loongarch32 --defsym=ERR=1 < %s 2>&1 \ +-# RUN: | FileCheck %s --check-prefix=CHECK-ERR +-# RUN: not llvm-mc --triple=loongarch64 --defsym=ERR=1 < %s 2>&1 \ +-# RUN: | FileCheck %s --check-prefix=CHECK-ERR +- +-.data +- +-# CHECK-ASM: .byte 0 +-# CHECK-ASM-NEXT: .byte 1 +-# CHECK-ASM-NEXT: .byte 171 +-# CHECK-ASM-NEXT: .byte 255 +-# CHECK-DATA: Contents of section .data: +-# CHECK-DATA-NEXT: 0000 0001abff 0100ffff 0100ffff 0100ffff +-.byte 0 +-.byte 1 +-.byte 0xab +-.byte 0xff +- +-# CHECK-ASM: .half 1 +-# CHECK-ASM-NEXT: .half 65535 +-.half 0x1 +-.half 0xffff +- +-# CHECK-ASM: .half 1 +-# CHECK-ASM-NEXT: .half 65535 +-.2byte 0x1 +-.2byte 0xffff +- +-# CHECK-ASM: .half 1 +-# CHECK-ASM-NEXT: .half 65535 +-.short 0x1 +-.short 0xffff +- +-# CHECK-ASM: .half 0 +-# CHECK-ASM-NEXT: .half 1 +-# CHECK-ASM-NEXT: .half 4660 +-# CHECK-ASM-NEXT: .half 65535 +-# CHECK-DATA-NEXT: 0010 00000100 3412ffff 01000000 ffffffff +-.hword 0 +-.hword 0x1 +-.hword 0x1234 +-.hword 0xffff +- +-# CHECK-ASM: .word 1 +-# CHECK-ASM-NEXT: .word 4294967295 +-.word 0x1 +-.word 0xffffffff +- +-# CHECK-ASM: .word 1 +-# CHECK-ASM-NEXT: .word 4294967295 +-# CHECK-DATA-NEXT: 0020 01000000 ffffffff 01000000 ffffffff +-.long 0x1 +-.long 0xffffffff +- +-# CHECK-ASM: .word 1 +-# CHECK-ASM-NEXT: .word 4294967295 +-.4byte 0x1 +-.4byte 0xffffffff +- +-# CHECK-ASM: .dword 1 +-# CHECK-ASM-NEXT: .dword 1234605616436508552 +-# CHECK-DATA-NEXT: 0030 01000000 00000000 88776655 44332211 +-.dword 0x1 +-.dword 0x1122334455667788 +- +-# CHECK-ASM: .dword 1 +-# CHECK-ASM-NEXT: .dword 1234605616436508552 +-# CHECK-DATA-NEXT: 0040 01000000 00000000 88776655 44332211 +-.8byte 0x1 +-.8byte 0x1122334455667788 +- +-.ifdef ERR +-# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value +-.byte 0xffa +-# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value +-.half 0xffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value +-.short 0xffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value +-.hword 0xffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value +-.2byte 0xffffa +-# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value +-.word 0xffffffffa +-# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value +-.long 0xffffffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value +-.4byte 0xffffffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: literal value out of range for directive +-.dword 0xffffffffffffffffa +-# CHECK-ERR: :[[#@LINE+1]]:8: error: literal value out of range for directive +-.8byte 0xffffffffffffffffa +-.endif +diff --git a/llvm/test/MC/LoongArch/Misc/aligned-nops.s b/llvm/test/MC/LoongArch/Misc/aligned-nops.s +deleted file mode 100644 +index 8554b4998..000000000 +--- a/llvm/test/MC/LoongArch/Misc/aligned-nops.s ++++ /dev/null +@@ -1,15 +0,0 @@ +-# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ +-# RUN: | llvm-objdump -d - | FileCheck %s +- +-# func1 and func2 are 8 byte alignment but the func1's size is 4. +-# So assembler will insert a nop to make sure 8 byte alignment. +- +-.text +- +-.p2align 3 +-func1: +- addi.d $sp, $sp, -16 +-# CHECK: addi.d $sp, $sp, -16 +-# CHECK-NEXT: nop +-.p2align 3 +-func2: +diff --git a/llvm/test/MC/LoongArch/aligned-nops.s b/llvm/test/MC/LoongArch/aligned-nops.s +new file mode 100644 +index 000000000..2ef26ac4b +--- /dev/null ++++ b/llvm/test/MC/LoongArch/aligned-nops.s +@@ -0,0 +1,25 @@ ++# RUN: llvm-mc -filetype=obj -triple loongarch64 < %s \ ++# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-INST %s ++ ++# alpha and main are 8 byte alignment ++# but the alpha function's size is 4 ++# So assembler will insert a nop to make sure 8 byte alignment. ++ ++ .text ++ .p2align 3 ++ .type alpha,@function ++alpha: ++# BB#0: ++ addi.d $sp, $sp, -16 ++# CHECK-INST: nop ++.Lfunc_end0: ++ .size alpha, .Lfunc_end0-alpha ++ # -- End function ++ .globl main ++ .p2align 3 ++ .type main,@function ++main: # @main ++# BB#0: ++.Lfunc_end1: ++ .size main, .Lfunc_end1-main ++ # -- End function +diff --git a/llvm/test/MC/LoongArch/atomic-error.s b/llvm/test/MC/LoongArch/atomic-error.s +new file mode 100644 +index 000000000..7e61a5ba5 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/atomic-error.s +@@ -0,0 +1,7 @@ ++# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s ++ ++# CHECK: error: expected memory with constant 0 offset ++amadd_db.d $a1, $t5, $s6, 1 ++ ++# CHECK: error: unexpected token in argument list ++amadd_db.d $a1, $t5, $s6, a +diff --git a/llvm/test/MC/LoongArch/atomic.s b/llvm/test/MC/LoongArch/atomic.s +new file mode 100644 +index 000000000..10a406550 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/atomic.s +@@ -0,0 +1,12 @@ ++# RUN: llvm-mc %s --triple=loongarch64 --show-encoding | \ ++# RUN: FileCheck --check-prefixes=ASM,ASM-AND-OBJ %s ++# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - | \ ++# RUN: FileCheck --check-prefixes=ASM-AND-OBJ %s ++ ++# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 ++# ASM: encoding: [0xa5,0xc7,0x6a,0x38] ++amadd_db.d $a1, $t5, $s6, 0 ++ ++# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 ++# ASM: encoding: [0xa5,0xc7,0x6a,0x38] ++amadd_db.d $a1, $t5, $s6 +diff --git a/llvm/test/MC/LoongArch/fixups-expr.s b/llvm/test/MC/LoongArch/fixups-expr.s +new file mode 100644 +index 000000000..d35fe7e77 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/fixups-expr.s +@@ -0,0 +1,40 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ ++# RUN: | llvm-readobj -r - | FileCheck %s ++ ++# Check that subtraction expressions are emitted as two relocations ++ ++.globl G1 ++.globl G2 ++.L1: ++G1: ++nop ++.L2: ++G2: ++ ++.data ++.8byte .L2-.L1 # CHECK: 0x0 R_LARCH_ADD64 .L2 0x0 ++ # CHECK: 0x0 R_LARCH_SUB64 .L1 0x0 ++.8byte G2-G1 # CHECK: 0x8 R_LARCH_ADD64 G2 0x0 ++ # CHECK: 0x8 R_LARCH_SUB64 G1 0x0 ++.4byte .L2-.L1 # CHECK: 0x10 R_LARCH_ADD32 .L2 0x0 ++ # CHECK: 0x10 R_LARCH_SUB32 .L1 0x0 ++.4byte G2-G1 # CHECK: 0x14 R_LARCH_ADD32 G2 0x0 ++ # CHECK: 0x14 R_LARCH_SUB32 G1 0x0 ++.2byte .L2-.L1 # CHECK: 0x18 R_LARCH_ADD16 .L2 0x0 ++ # CHECK: 0x18 R_LARCH_SUB16 .L1 0x0 ++.2byte G2-G1 # CHECK: 0x1A R_LARCH_ADD16 G2 0x0 ++ # CHECK: 0x1A R_LARCH_SUB16 G1 0x0 ++.byte .L2-.L1 # CHECK: 0x1C R_LARCH_ADD8 .L2 0x0 ++ # CHECK: 0x1C R_LARCH_SUB8 .L1 0x0 ++.byte G2-G1 # CHECK: 0x1D R_LARCH_ADD8 G2 0x0 ++ # CHECK: 0x1D R_LARCH_SUB8 G1 0x0 ++ ++.section .rodata.str.1 ++.L.str: ++.asciz "string" ++ ++.rodata ++.Lreltable: ++.word .L.str-.Lreltable # CHECK: 0x0 R_LARCH_ADD32 .L.str 0x0 ++ # CHECK: 0x0 R_LARCH_SUB32 .Lreltable 0x0 ++ +diff --git a/llvm/test/MC/LoongArch/invalid.s b/llvm/test/MC/LoongArch/invalid.s +new file mode 100644 +index 000000000..e0fc7ce4b +--- /dev/null ++++ b/llvm/test/MC/LoongArch/invalid.s +@@ -0,0 +1,50 @@ ++# RUN: not llvm-mc %s -triple=loongarch64-unknown-linux-gnu 2>&1 | FileCheck %s ++.text ++csrxchg $r6, $r0, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($zero) for instruction ++csrxchg $r6, $r1, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($r1) for instruction ++ ++## out-of-bound immediate ++### simm16 << 2 ++beq $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++beq $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bne $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bne $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++blt $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++blt $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bge $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bge $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bltu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bltu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bgeu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bgeu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++### simm21 << 2 ++beqz $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++beqz $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bnez $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bnez $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bceqz $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bceqz $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bcnez $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bcnez $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++### simm26 << 2 ++b -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++b 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bl -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++bl 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range ++ ++## unaligned immediate ++### simm16 << 2 ++beq $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bne $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++blt $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bge $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bltu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bgeu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++### simm21 << 2 ++beqz $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bnez $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bceqz $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bcnez $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++### simm26 << 2 ++b 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address ++bl 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +diff --git a/llvm/test/MC/LoongArch/lit.local.cfg b/llvm/test/MC/LoongArch/lit.local.cfg +index 2b5a4893e..6223fc691 100644 +--- a/llvm/test/MC/LoongArch/lit.local.cfg ++++ b/llvm/test/MC/LoongArch/lit.local.cfg +@@ -1,2 +1,3 @@ + if not 'LoongArch' in config.root.targets: + config.unsupported = True ++ +diff --git a/llvm/test/MC/LoongArch/macro-la.s b/llvm/test/MC/LoongArch/macro-la.s +new file mode 100644 +index 000000000..eca76ac69 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/macro-la.s +@@ -0,0 +1,168 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: la.pcrel $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: (symbol)+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] ++# CHECK: # fixup A - offset: 0, value: (symbol)+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: (symbol)+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup C - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup E - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup G - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.local $a0, symbol ++ ++# CHECK: la.got $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.global $a0, symbol ++ ++# CHECK: la.pcrel $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: (symbol)+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] ++# CHECK: # fixup A - offset: 0, value: (symbol)+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: (symbol)+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup C - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup E - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup G - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.pcrel $a0, symbol ++ ++# CHECK: la.got $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.got $a0, symbol ++ ++# CHECK: la.tls.le $r4, symbol # encoding: [0x04,0x00,0x00,0x14] ++# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL ++# CHECK: # fixup B - offset: 0, value: 32, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup D - offset: 0, value: 44, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0x80,0x03] ++# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL ++# CHECK: # fixup B - offset: 0, value: 4095, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_AND ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_U_10_12 ++# CHECK: # la expanded slot # encoding: [0x04,0x00,0x00,0x16] ++# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL ++# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup D - offset: 0, value: 44, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0x00,0x03] ++# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL ++# CHECK: # fixup B - offset: 0, value: 52, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.tls.le $a0, symbol ++ ++# CHECK: la.tls.ie $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.tls.ie $a0, symbol ++ ++# CHECK: la.tls.gd $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.tls.ld $a0, symbol ++ ++# CHECK: la.tls.gd $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 ++# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] ++# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL ++# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD ++# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD ++# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR ++# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE ++# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL ++# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB ++# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 ++la.tls.gd $a0, symbol +diff --git a/llvm/test/MC/LoongArch/macro-li.s b/llvm/test/MC/LoongArch/macro-li.s +new file mode 100644 +index 000000000..b1a7c58ba +--- /dev/null ++++ b/llvm/test/MC/LoongArch/macro-li.s +@@ -0,0 +1,773 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu | FileCheck %s ++li.w $a0, 0x00000000 # CHECK: ori $r4, $zero, 0 ++li.w $a0, 0x000007ff # CHECK: ori $r4, $zero, 2047 ++li.w $a0, 0x00000800 # CHECK: ori $r4, $zero, 2048 ++li.w $a0, 0x00000fff # CHECK: ori $r4, $zero, 4095 ++li.w $a0, 0x7ffff000 # CHECK: lu12i.w $r4, 524287 ++li.w $a0, 0x7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++li.w $a0, 0x7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++li.w $a0, 0x7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++li.w $a0, 0x80000000 # CHECK: lu12i.w $r4, -524288 ++li.w $a0, 0x800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++li.w $a0, 0x80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++li.w $a0, 0x80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++li.w $a0, 0xfffff000 # CHECK: lu12i.w $r4, -1 ++li.w $a0, 0xfffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++li.w $a0, 0xfffff800 # CHECK: addi.w $r4, $zero, -2048 ++li.w $a0, 0xffffffff # CHECK: addi.w $r4, $zero, -1 ++li.d $a0, 0x0000000000000000 # CHECK: addi.d $r4, $zero, 0 ++li.d $a0, 0x00000000000007ff # CHECK: addi.d $r4, $zero, 2047 ++li.d $a0, 0x0000000000000800 # CHECK: ori $r4, $zero, 2048 ++li.d $a0, 0x0000000000000fff # CHECK: ori $r4, $zero, 4095 ++li.d $a0, 0x000000007ffff000 # CHECK: lu12i.w $r4, 524287 ++li.d $a0, 0x000000007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++li.d $a0, 0x000000007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++li.d $a0, 0x000000007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++li.d $a0, 0x0000000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x00000000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x0000000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x0000000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x00000000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x00000000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x00000000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x00000000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 0 ++li.d $a0, 0x0007ffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007fffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007fffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0007ffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 524287 ++li.d $a0, 0x0008000000000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x0008000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x0008000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000800007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000800007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000800007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000800007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x0008000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x0008000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x0008000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x00080000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000ffffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000ffffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x000fffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu52i.d $r4, $r4, 0 ++li.d $a0, 0x7ff0000000000000 # CHECK: lu52i.d $r4, $zero, 2047 ++li.d $a0, 0x7ff00000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff0000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff0000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff000007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff000007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff000007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff000007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff0000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff00000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff0000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff0000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff00000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff00000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff00000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7fffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7fffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000000000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff800007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff800007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff800007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff800007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff8000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ff80000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ffffffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ffffffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7ffffffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x7fffffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu52i.d $r4, $r4, 2047 ++li.d $a0, 0x8000000000000000 # CHECK: lu52i.d $r4, $zero, -2048 ++li.d $a0, 0x80000000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8000000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8000000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800000007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800000007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800000007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800000007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8000000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80000000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8000000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8000000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80000000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80000000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80000000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80000000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007fffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007fffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8007ffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000000000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800800007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800800007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800800007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800800007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x8008000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x80080000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800ffffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800ffffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0x800fffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu52i.d $r4, $r4, -2048 ++li.d $a0, 0xfff0000000000000 # CHECK: lu52i.d $r4, $zero, -1 ++li.d $a0, 0xfff00000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff0000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff0000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff000007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff000007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff000007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff000007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff0000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff00000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff0000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff0000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff00000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff00000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff00000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 0 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff80000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7fffffffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7fffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, 524287 ++ # CHECK: lu52i.d $r4, $r4, -1 ++li.d $a0, 0xfff8000000000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff8000000000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff8000000000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff800007ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff800007ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff800007ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff800007fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff8000080000000 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff8000080000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff8000080000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000fffff000 # CHECK: lu12i.w $r4, -1 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000fffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xfff80000ffffffff # CHECK: addi.w $r4, $zero, -1 ++ # CHECK: lu32i.d $r4, -524288 ++li.d $a0, 0xffffffff00000000 # CHECK: ori $r4, $zero, 0 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff000007ff # CHECK: ori $r4, $zero, 2047 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff00000800 # CHECK: ori $r4, $zero, 2048 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff00000fff # CHECK: ori $r4, $zero, 4095 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff7ffff000 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2047 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff7ffff800 # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 2048 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff7fffffff # CHECK: lu12i.w $r4, 524287 ++ # CHECK: ori $r4, $r4, 4095 ++ # CHECK: lu32i.d $r4, -1 ++li.d $a0, 0xffffffff80000000 # CHECK: lu12i.w $r4, -524288 ++li.d $a0, 0xffffffff800007ff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2047 ++li.d $a0, 0xffffffff80000800 # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 2048 ++li.d $a0, 0xffffffff80000fff # CHECK: lu12i.w $r4, -524288 ++ # CHECK: ori $r4, $r4, 4095 ++li.d $a0, 0xfffffffffffff000 # CHECK: lu12i.w $r4, -1 ++li.d $a0, 0xfffffffffffff7ff # CHECK: lu12i.w $r4, -1 ++ # CHECK: ori $r4, $r4, 2047 ++li.d $a0, 0xfffffffffffff800 # CHECK: addi.d $r4, $zero, -2048 ++li.d $a0, 0xffffffffffffffff # CHECK: addi.d $r4, $zero, -1 +diff --git a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s b/llvm/test/MC/LoongArch/unaligned-nops.s +similarity index 54% +rename from llvm/test/MC/LoongArch/Misc/unaligned-nops.s +rename to llvm/test/MC/LoongArch/unaligned-nops.s +index 5952540b4..453e2cdca 100644 +--- a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s ++++ b/llvm/test/MC/LoongArch/unaligned-nops.s +@@ -1,4 +1,4 @@ +-# RUN: not --crash llvm-mc --filetype=obj --triple=loongarch64 %s -o %t ++# RUN: not --crash llvm-mc -filetype=obj -triple=loongarch64 %s -o %t + .byte 1 + # CHECK: LLVM ERROR: unable to write nop sequence of 3 bytes + .p2align 2 +diff --git a/llvm/test/MC/LoongArch/valid_12imm.s b/llvm/test/MC/LoongArch/valid_12imm.s +new file mode 100644 +index 000000000..ed44180bf +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_12imm.s +@@ -0,0 +1,33 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: slti $r27, $ra, 235 ++# CHECK: encoding: [0x3b,0xac,0x03,0x02] ++slti $r27, $ra, 235 ++ ++# CHECK: sltui $zero, $r8, 162 ++# CHECK: encoding: [0x00,0x89,0x42,0x02] ++sltui $zero, $r8, 162 ++ ++# CHECK: addi.w $r5, $r7, 246 ++# CHECK: encoding: [0xe5,0xd8,0x83,0x02] ++addi.w $r5, $r7, 246 ++ ++# CHECK: addi.d $r28, $r6, 75 ++# CHECK: encoding: [0xdc,0x2c,0xc1,0x02] ++addi.d $r28, $r6, 75 ++ ++# CHECK: lu52i.d $r13, $r4, 195 ++# CHECK: encoding: [0x8d,0x0c,0x03,0x03] ++lu52i.d $r13, $r4, 195 ++ ++# CHECK: andi $r25, $zero, 106 ++# CHECK: encoding: [0x19,0xa8,0x41,0x03] ++andi $r25, $zero, 106 ++ ++# CHECK: ori $r17, $r5, 47 ++# CHECK: encoding: [0xb1,0xbc,0x80,0x03] ++ori $r17, $r5, 47 ++ ++# CHECK: xori $r18, $r23, 99 ++# CHECK: encoding: [0xf2,0x8e,0xc1,0x03] ++xori $r18, $r23, 99 ++ +diff --git a/llvm/test/MC/LoongArch/valid_4operands.s b/llvm/test/MC/LoongArch/valid_4operands.s +new file mode 100644 +index 000000000..1418bb677 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_4operands.s +@@ -0,0 +1,53 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: fmadd.s $f3, $f16, $f3, $f15 ++# CHECK: encoding: [0x03,0x8e,0x17,0x08] ++fmadd.s $f3, $f16, $f3, $f15 ++ ++# CHECK: fmadd.d $f21, $f24, $f28, $f24 ++# CHECK: encoding: [0x15,0x73,0x2c,0x08] ++fmadd.d $f21, $f24, $f28, $f24 ++ ++# CHECK: fmsub.s $f23, $f11, $f21, $f4 ++# CHECK: encoding: [0x77,0x55,0x52,0x08] ++fmsub.s $f23, $f11, $f21, $f4 ++ ++# CHECK: fmsub.d $f6, $f18, $f20, $f27 ++# CHECK: encoding: [0x46,0xd2,0x6d,0x08] ++fmsub.d $f6, $f18, $f20, $f27 ++ ++# CHECK: fnmadd.s $f29, $f1, $f24, $f20 ++# CHECK: encoding: [0x3d,0x60,0x9a,0x08] ++fnmadd.s $f29, $f1, $f24, $f20 ++ ++# CHECK: fnmadd.d $f25, $f13, $f19, $f30 ++# CHECK: encoding: [0xb9,0x4d,0xaf,0x08] ++fnmadd.d $f25, $f13, $f19, $f30 ++ ++# CHECK: fnmsub.s $f8, $f4, $f24, $f25 ++# CHECK: encoding: [0x88,0xe0,0xdc,0x08] ++fnmsub.s $f8, $f4, $f24, $f25 ++ ++# CHECK: fnmsub.d $f30, $f26, $f7, $f24 ++# CHECK: encoding: [0x5e,0x1f,0xec,0x08] ++fnmsub.d $f30, $f26, $f7, $f24 ++ ++# CHECK: fcmp.ceq.s $fcc7, $f17, $f29 ++# CHECK: encoding: [0x27,0x76,0x12,0x0c] ++fcmp.ceq.s $fcc7, $f17, $f29 ++ ++# CHECK: fcmp.ceq.d $fcc4, $f12, $f9 ++# CHECK: encoding: [0x84,0x25,0x22,0x0c] ++fcmp.ceq.d $fcc4, $f12, $f9 ++ ++# CHECK: fcmp.cult.s $fcc0, $f0, $f1 ++# CHECK: encoding: [0x00,0x04,0x15,0x0c] ++fcmp.cult.s $fcc0, $f0, $f1 ++ ++# CHECK: fcmp.cult.d $fcc2, $f3, $f4 ++# CHECK: encoding: [0x62,0x10,0x25,0x0c] ++fcmp.cult.d $fcc2, $f3, $f4 ++ ++# CHECK: fsel $f18, $f20, $f21, $fcc4 ++# CHECK: encoding: [0x92,0x56,0x02,0x0d] ++fsel $f18, $f20, $f21, $fcc4 ++ +diff --git a/llvm/test/MC/LoongArch/valid_bigimm.s b/llvm/test/MC/LoongArch/valid_bigimm.s +new file mode 100644 +index 000000000..d7b3bbb7d +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_bigimm.s +@@ -0,0 +1,33 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: addu16i.d $r9, $r23, 23 ++# CHECK: encoding: [0xe9,0x5e,0x00,0x10] ++addu16i.d $r9, $r23, 23 ++ ++# CHECK: lu12i.w $r16, 49 ++# CHECK: encoding: [0x30,0x06,0x00,0x14] ++lu12i.w $r16, 49 ++ ++# CHECK: lu12i.w $r4, -1 ++# CHECK: encoding: [0xe4,0xff,0xff,0x15] ++lu12i.w $r4, -1 ++ ++# CHECK: lu32i.d $sp, 196 ++# CHECK: encoding: [0x83,0x18,0x00,0x16] ++lu32i.d $sp, 196 ++ ++# CHECK: pcaddi $r9, 187 ++# CHECK: encoding: [0x69,0x17,0x00,0x18] ++pcaddi $r9, 187 ++ ++# CHECK: pcalau12i $r10, 89 ++# CHECK: encoding: [0x2a,0x0b,0x00,0x1a] ++pcalau12i $r10, 89 ++ ++# CHECK: pcaddu12i $zero, 37 ++# CHECK: encoding: [0xa0,0x04,0x00,0x1c] ++pcaddu12i $zero, 37 ++ ++# CHECK: pcaddu18i $r12, 26 ++# CHECK: encoding: [0x4c,0x03,0x00,0x1e] ++pcaddu18i $r12, 26 ++ +diff --git a/llvm/test/MC/LoongArch/valid_branch.s b/llvm/test/MC/LoongArch/valid_branch.s +new file mode 100644 +index 000000000..256e70b6d +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_branch.s +@@ -0,0 +1,155 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding \ ++# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ,CHECK-ASM %s ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -filetype=obj \ ++# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-ASM-AND-OBJ %s ++ ++## random operands ++# CHECK-ASM-AND-OBJ: beqz $r9, 96 ++# CHECK-ASM: encoding: [0x20,0x61,0x00,0x40] ++beqz $r9, 96 ++ ++# CHECK-ASM-AND-OBJ: bnez $sp, 212 ++# CHECK-ASM: encoding: [0x60,0xd4,0x00,0x44] ++bnez $sp, 212 ++ ++# CHECK-ASM-AND-OBJ: bceqz $fcc6, 12 ++# CHECK-ASM: encoding: [0xc0,0x0c,0x00,0x48] ++bceqz $fcc6, 12 ++ ++# CHECK-ASM-AND-OBJ: bcnez $fcc6, 72 ++# CHECK-ASM: encoding: [0xc0,0x49,0x00,0x48] ++bcnez $fcc6, 72 ++ ++# CHECK-ASM-AND-OBJ: b 248 ++# CHECK-ASM: encoding: [0x00,0xf8,0x00,0x50] ++b 248 ++ ++# CHECK-ASM-AND-OBJ: bl 236 ++# CHECK-ASM: encoding: [0x00,0xec,0x00,0x54] ++bl 236 ++ ++# CHECK-ASM-AND-OBJ: beq $r10, $r7, 176 ++# CHECK-ASM: encoding: [0x47,0xb1,0x00,0x58] ++beq $r10, $r7, 176 ++ ++# CHECK-ASM-AND-OBJ: bne $r25, $ra, 136 ++# CHECK-ASM: encoding: [0x21,0x8b,0x00,0x5c] ++bne $r25, $ra, 136 ++ ++# CHECK-ASM-AND-OBJ: blt $r15, $r30, 168 ++# CHECK-ASM: encoding: [0xfe,0xa9,0x00,0x60] ++blt $r15, $r30, 168 ++ ++# CHECK-ASM-AND-OBJ: bge $r12, $r15, 148 ++# CHECK-ASM: encoding: [0x8f,0x95,0x00,0x64] ++bge $r12, $r15, 148 ++ ++# CHECK-ASM-AND-OBJ: bltu $r17, $r5, 4 ++# CHECK-ASM: encoding: [0x25,0x06,0x00,0x68] ++bltu $r17, $r5, 4 ++ ++# CHECK-ASM-AND-OBJ: bgeu $r6, $r23, 140 ++# CHECK-ASM: encoding: [0xd7,0x8c,0x00,0x6c] ++bgeu $r6, $r23, 140 ++ ++ ++## immediate lower/upper boundary ++### simm16 << 2 ++# CHECK-ASM-AND-OBJ: beq $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5a] ++beq $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: beq $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x59] ++beq $r10, $r7, 0x1FFFC ++ ++# CHECK-ASM-AND-OBJ: bne $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5e] ++bne $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: bne $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x5d] ++bne $r10, $r7, 0x1FFFC ++ ++# CHECK-ASM-AND-OBJ: blt $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x62] ++blt $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: blt $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x61] ++blt $r10, $r7, 0x1FFFC ++ ++# CHECK-ASM-AND-OBJ: bge $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x66] ++bge $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: bge $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x65] ++bge $r10, $r7, 0x1FFFC ++ ++# CHECK-ASM-AND-OBJ: bltu $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6a] ++bltu $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: bltu $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x69] ++bltu $r10, $r7, 0x1FFFC ++ ++# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, -131072 ++# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6e] ++bgeu $r10, $r7, -0x20000 ++ ++# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, 131068 ++# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x6d] ++bgeu $r10, $r7, 0x1FFFC ++ ++### simm21 << 2 ++# CHECK-ASM-AND-OBJ: beqz $r9, -4194304 ++# CHECK-ASM: encoding: [0x30,0x01,0x00,0x40] ++beqz $r9, -0x400000 ++ ++# CHECK-ASM-AND-OBJ: beqz $r9, 4194300 ++# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x43] ++beqz $r9, 0x3FFFFC ++ ++# CHECK-ASM-AND-OBJ: bnez $r9, -4194304 ++# CHECK-ASM: encoding: [0x30,0x01,0x00,0x44] ++bnez $r9, -0x400000 ++ ++# CHECK-ASM-AND-OBJ: bnez $r9, 4194300 ++# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x47] ++bnez $r9, 0x3FFFFC ++ ++# CHECK-ASM-AND-OBJ: bceqz $fcc6, -4194304 ++# CHECK-ASM: encoding: [0xd0,0x00,0x00,0x48] ++bceqz $fcc6, -0x400000 ++ ++# CHECK-ASM-AND-OBJ: bceqz $fcc6, 4194300 ++# CHECK-ASM: encoding: [0xcf,0xfc,0xff,0x4b] ++bceqz $fcc6, 0x3FFFFC ++ ++# CHECK-ASM-AND-OBJ: bcnez $fcc6, -4194304 ++# CHECK-ASM: encoding: [0xd0,0x01,0x00,0x48] ++bcnez $fcc6, -0x400000 ++ ++# CHECK-ASM-AND-OBJ: bcnez $fcc6, 4194300 ++# CHECK-ASM: encoding: [0xcf,0xfd,0xff,0x4b] ++bcnez $fcc6, 0x3FFFFC ++ ++### simm26 << 2 ++# CHECK-ASM-AND-OBJ: b -134217728 ++# CHECK-ASM: encoding: [0x00,0x02,0x00,0x50] ++b -0x8000000 ++ ++# CHECK-ASM-AND-OBJ: b 134217724 ++# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x53] ++b 0x7FFFFFC ++ ++# CHECK-ASM-AND-OBJ: bl -134217728 ++# CHECK-ASM: encoding: [0x00,0x02,0x00,0x54] ++bl -0x8000000 ++ ++# CHECK-ASM-AND-OBJ: bl 134217724 ++# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x57] ++bl 0x7FFFFFC ++ +diff --git a/llvm/test/MC/LoongArch/valid_float.s b/llvm/test/MC/LoongArch/valid_float.s +new file mode 100644 +index 000000000..05ecefdc1 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_float.s +@@ -0,0 +1,297 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: fadd.s $f29, $f15, $f25 ++# CHECK: encoding: [0xfd,0xe5,0x00,0x01] ++fadd.s $f29, $f15, $f25 ++ ++# CHECK: fadd.d $f25, $f7, $f13 ++# CHECK: encoding: [0xf9,0x34,0x01,0x01] ++fadd.d $f25, $f7, $f13 ++ ++# CHECK: fsub.s $f14, $f6, $f31 ++# CHECK: encoding: [0xce,0xfc,0x02,0x01] ++fsub.s $f14, $f6, $f31 ++ ++# CHECK: fsub.d $f29, $f1, $f18 ++# CHECK: encoding: [0x3d,0x48,0x03,0x01] ++fsub.d $f29, $f1, $f18 ++ ++# CHECK: fmul.s $f0, $f7, $f17 ++# CHECK: encoding: [0xe0,0xc4,0x04,0x01] ++fmul.s $f0, $f7, $f17 ++ ++# CHECK: fmul.d $f4, $f30, $f7 ++# CHECK: encoding: [0xc4,0x1f,0x05,0x01] ++fmul.d $f4, $f30, $f7 ++ ++# CHECK: fdiv.s $f20, $f24, $f19 ++# CHECK: encoding: [0x14,0xcf,0x06,0x01] ++fdiv.s $f20, $f24, $f19 ++ ++# CHECK: fdiv.d $f3, $f25, $f28 ++# CHECK: encoding: [0x23,0x73,0x07,0x01] ++fdiv.d $f3, $f25, $f28 ++ ++# CHECK: fmax.s $f22, $f6, $f27 ++# CHECK: encoding: [0xd6,0xec,0x08,0x01] ++fmax.s $f22, $f6, $f27 ++ ++# CHECK: fmax.d $f11, $f26, $f13 ++# CHECK: encoding: [0x4b,0x37,0x09,0x01] ++fmax.d $f11, $f26, $f13 ++ ++# CHECK: fmin.s $f14, $f10, $f19 ++# CHECK: encoding: [0x4e,0xcd,0x0a,0x01] ++fmin.s $f14, $f10, $f19 ++ ++# CHECK: fmin.d $f1, $f13, $f27 ++# CHECK: encoding: [0xa1,0x6d,0x0b,0x01] ++fmin.d $f1, $f13, $f27 ++ ++# CHECK: fmaxa.s $f9, $f27, $f31 ++# CHECK: encoding: [0x69,0xff,0x0c,0x01] ++fmaxa.s $f9, $f27, $f31 ++ ++# CHECK: fmaxa.d $f24, $f13, $f4 ++# CHECK: encoding: [0xb8,0x11,0x0d,0x01] ++fmaxa.d $f24, $f13, $f4 ++ ++# CHECK: fmina.s $f15, $f18, $f1 ++# CHECK: encoding: [0x4f,0x86,0x0e,0x01] ++fmina.s $f15, $f18, $f1 ++ ++# CHECK: fmina.d $f18, $f10, $f0 ++# CHECK: encoding: [0x52,0x01,0x0f,0x01] ++fmina.d $f18, $f10, $f0 ++ ++# CHECK: fscaleb.s $f21, $f23, $f6 ++# CHECK: encoding: [0xf5,0x9a,0x10,0x01] ++fscaleb.s $f21, $f23, $f6 ++ ++# CHECK: fscaleb.d $f12, $f14, $f26 ++# CHECK: encoding: [0xcc,0x69,0x11,0x01] ++fscaleb.d $f12, $f14, $f26 ++ ++# CHECK: fcopysign.s $f13, $f24, $f23 ++# CHECK: encoding: [0x0d,0xdf,0x12,0x01] ++fcopysign.s $f13, $f24, $f23 ++ ++# CHECK: fcopysign.d $f16, $f26, $f6 ++# CHECK: encoding: [0x50,0x1b,0x13,0x01] ++fcopysign.d $f16, $f26, $f6 ++ ++# CHECK: fabs.s $f28, $f12 ++# CHECK: encoding: [0x9c,0x05,0x14,0x01] ++fabs.s $f28, $f12 ++ ++# CHECK: fabs.d $f23, $f3 ++# CHECK: encoding: [0x77,0x08,0x14,0x01] ++fabs.d $f23, $f3 ++ ++# CHECK: fneg.s $f21, $f24 ++# CHECK: encoding: [0x15,0x17,0x14,0x01] ++fneg.s $f21, $f24 ++ ++# CHECK: fneg.d $f11, $f26 ++# CHECK: encoding: [0x4b,0x1b,0x14,0x01] ++fneg.d $f11, $f26 ++ ++# CHECK: flogb.s $f31, $f23 ++# CHECK: encoding: [0xff,0x26,0x14,0x01] ++flogb.s $f31, $f23 ++ ++# CHECK: flogb.d $f21, $f29 ++# CHECK: encoding: [0xb5,0x2b,0x14,0x01] ++flogb.d $f21, $f29 ++ ++# CHECK: fclass.s $f20, $f9 ++# CHECK: encoding: [0x34,0x35,0x14,0x01] ++fclass.s $f20, $f9 ++ ++# CHECK: fclass.d $f19, $f2 ++# CHECK: encoding: [0x53,0x38,0x14,0x01] ++fclass.d $f19, $f2 ++ ++# CHECK: fsqrt.s $f27, $f18 ++# CHECK: encoding: [0x5b,0x46,0x14,0x01] ++fsqrt.s $f27, $f18 ++ ++# CHECK: fsqrt.d $f2, $f11 ++# CHECK: encoding: [0x62,0x49,0x14,0x01] ++fsqrt.d $f2, $f11 ++ ++# CHECK: frecip.s $f17, $f27 ++# CHECK: encoding: [0x71,0x57,0x14,0x01] ++frecip.s $f17, $f27 ++ ++# CHECK: frecip.d $f27, $f27 ++# CHECK: encoding: [0x7b,0x5b,0x14,0x01] ++frecip.d $f27, $f27 ++ ++# CHECK: frsqrt.s $f25, $f12 ++# CHECK: encoding: [0x99,0x65,0x14,0x01] ++frsqrt.s $f25, $f12 ++ ++# CHECK: frsqrt.d $f22, $f3 ++# CHECK: encoding: [0x76,0x68,0x14,0x01] ++frsqrt.d $f22, $f3 ++ ++# CHECK: fmov.s $f13, $f23 ++# CHECK: encoding: [0xed,0x96,0x14,0x01] ++fmov.s $f13, $f23 ++ ++# CHECK: fmov.d $f30, $f9 ++# CHECK: encoding: [0x3e,0x99,0x14,0x01] ++fmov.d $f30, $f9 ++ ++# CHECK: movgr2fr.w $f6, $tp ++# CHECK: encoding: [0x46,0xa4,0x14,0x01] ++movgr2fr.w $f6, $tp ++ ++# CHECK: movgr2fr.d $f30, $r11 ++# CHECK: encoding: [0x7e,0xa9,0x14,0x01] ++movgr2fr.d $f30, $r11 ++ ++# CHECK: movgr2frh.w $f23, $r26 ++# CHECK: encoding: [0x57,0xaf,0x14,0x01] ++movgr2frh.w $f23, $r26 ++ ++# CHECK: movfr2gr.s $r10, $f22 ++# CHECK: encoding: [0xca,0xb6,0x14,0x01] ++movfr2gr.s $r10, $f22 ++ ++# CHECK: movfr2gr.d $r26, $f17 ++# CHECK: encoding: [0x3a,0xba,0x14,0x01] ++movfr2gr.d $r26, $f17 ++ ++# CHECK: movfrh2gr.s $sp, $f26 ++# CHECK: encoding: [0x43,0xbf,0x14,0x01] ++movfrh2gr.s $sp, $f26 ++ ++# CHECK: movfr2cf $fcc4, $f11 ++# CHECK: encoding: [0x64,0xd1,0x14,0x01] ++movfr2cf $fcc4, $f11 ++ ++# CHECK: movcf2fr $f16, $fcc0 ++# CHECK: encoding: [0x10,0xd4,0x14,0x01] ++movcf2fr $f16, $fcc0 ++ ++# CHECK: movgr2cf $fcc5, $ra ++# CHECK: encoding: [0x25,0xd8,0x14,0x01] ++movgr2cf $fcc5, $ra ++ ++# CHECK: movcf2gr $r21, $fcc7 ++# CHECK: encoding: [0xf5,0xdc,0x14,0x01] ++movcf2gr $r21, $fcc7 ++ ++# CHECK: fcvt.s.d $f12, $f19 ++# CHECK: encoding: [0x6c,0x1a,0x19,0x01] ++fcvt.s.d $f12, $f19 ++ ++# CHECK: fcvt.d.s $f10, $f6 ++# CHECK: encoding: [0xca,0x24,0x19,0x01] ++fcvt.d.s $f10, $f6 ++ ++# CHECK: ftintrm.w.s $f16, $f16 ++# CHECK: encoding: [0x10,0x06,0x1a,0x01] ++ftintrm.w.s $f16, $f16 ++ ++# CHECK: ftintrm.w.d $f7, $f8 ++# CHECK: encoding: [0x07,0x09,0x1a,0x01] ++ftintrm.w.d $f7, $f8 ++ ++# CHECK: ftintrm.l.s $f24, $f10 ++# CHECK: encoding: [0x58,0x25,0x1a,0x01] ++ftintrm.l.s $f24, $f10 ++ ++# CHECK: ftintrm.l.d $f9, $f9 ++# CHECK: encoding: [0x29,0x29,0x1a,0x01] ++ftintrm.l.d $f9, $f9 ++ ++# CHECK: ftintrp.w.s $f14, $f31 ++# CHECK: encoding: [0xee,0x47,0x1a,0x01] ++ftintrp.w.s $f14, $f31 ++ ++# CHECK: ftintrp.w.d $f12, $f3 ++# CHECK: encoding: [0x6c,0x48,0x1a,0x01] ++ftintrp.w.d $f12, $f3 ++ ++# CHECK: ftintrp.l.s $f0, $f16 ++# CHECK: encoding: [0x00,0x66,0x1a,0x01] ++ftintrp.l.s $f0, $f16 ++ ++# CHECK: ftintrp.l.d $f4, $f29 ++# CHECK: encoding: [0xa4,0x6b,0x1a,0x01] ++ftintrp.l.d $f4, $f29 ++ ++# CHECK: ftintrz.w.s $f4, $f29 ++# CHECK: encoding: [0xa4,0x87,0x1a,0x01] ++ftintrz.w.s $f4, $f29 ++ ++# CHECK: ftintrz.w.d $f25, $f24 ++# CHECK: encoding: [0x19,0x8b,0x1a,0x01] ++ftintrz.w.d $f25, $f24 ++ ++# CHECK: ftintrz.l.s $f23, $f5 ++# CHECK: encoding: [0xb7,0xa4,0x1a,0x01] ++ftintrz.l.s $f23, $f5 ++ ++# CHECK: ftintrz.l.d $f3, $f10 ++# CHECK: encoding: [0x43,0xa9,0x1a,0x01] ++ftintrz.l.d $f3, $f10 ++ ++# CHECK: ftintrne.w.s $f4, $f17 ++# CHECK: encoding: [0x24,0xc6,0x1a,0x01] ++ftintrne.w.s $f4, $f17 ++ ++# CHECK: ftintrne.w.d $f31, $f12 ++# CHECK: encoding: [0x9f,0xc9,0x1a,0x01] ++ftintrne.w.d $f31, $f12 ++ ++# CHECK: ftintrne.l.s $f22, $f27 ++# CHECK: encoding: [0x76,0xe7,0x1a,0x01] ++ftintrne.l.s $f22, $f27 ++ ++# CHECK: ftintrne.l.d $f28, $f6 ++# CHECK: encoding: [0xdc,0xe8,0x1a,0x01] ++ftintrne.l.d $f28, $f6 ++ ++# CHECK: ftint.w.s $f21, $f13 ++# CHECK: encoding: [0xb5,0x05,0x1b,0x01] ++ftint.w.s $f21, $f13 ++ ++# CHECK: ftint.w.d $f3, $f14 ++# CHECK: encoding: [0xc3,0x09,0x1b,0x01] ++ftint.w.d $f3, $f14 ++ ++# CHECK: ftint.l.s $f31, $f24 ++# CHECK: encoding: [0x1f,0x27,0x1b,0x01] ++ftint.l.s $f31, $f24 ++ ++# CHECK: ftint.l.d $f16, $f24 ++# CHECK: encoding: [0x10,0x2b,0x1b,0x01] ++ftint.l.d $f16, $f24 ++ ++# CHECK: ffint.s.w $f30, $f5 ++# CHECK: encoding: [0xbe,0x10,0x1d,0x01] ++ffint.s.w $f30, $f5 ++ ++# CHECK: ffint.s.l $f6, $f5 ++# CHECK: encoding: [0xa6,0x18,0x1d,0x01] ++ffint.s.l $f6, $f5 ++ ++# CHECK: ffint.d.w $f24, $f18 ++# CHECK: encoding: [0x58,0x22,0x1d,0x01] ++ffint.d.w $f24, $f18 ++ ++# CHECK: ffint.d.l $f23, $f26 ++# CHECK: encoding: [0x57,0x2b,0x1d,0x01] ++ffint.d.l $f23, $f26 ++ ++# CHECK: frint.s $f5, $f17 ++# CHECK: encoding: [0x25,0x46,0x1e,0x01] ++frint.s $f5, $f17 ++ ++# CHECK: frint.d $f29, $f2 ++# CHECK: encoding: [0x5d,0x48,0x1e,0x01] ++frint.d $f29, $f2 ++ +diff --git a/llvm/test/MC/LoongArch/valid_integer.s b/llvm/test/MC/LoongArch/valid_integer.s +new file mode 100644 +index 000000000..cc78662d5 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_integer.s +@@ -0,0 +1,369 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: clo.w $ra, $sp ++# CHECK: encoding: [0x61,0x10,0x00,0x00] ++clo.w $ra, $sp ++ ++# CHECK: clz.w $r7, $r10 ++# CHECK: encoding: [0x47,0x15,0x00,0x00] ++clz.w $r7, $r10 ++ ++# CHECK: cto.w $tp, $r6 ++# CHECK: encoding: [0xc2,0x18,0x00,0x00] ++cto.w $tp, $r6 ++ ++# CHECK: ctz.w $r5, $r22 ++# CHECK: encoding: [0xc5,0x1e,0x00,0x00] ++ctz.w $r5, $r22 ++ ++# CHECK: clo.d $r29, $ra ++# CHECK: encoding: [0x3d,0x20,0x00,0x00] ++clo.d $r29, $ra ++ ++# CHECK: clz.d $r26, $r26 ++# CHECK: encoding: [0x5a,0x27,0x00,0x00] ++clz.d $r26, $r26 ++ ++# CHECK: cto.d $r18, $r20 ++# CHECK: encoding: [0x92,0x2a,0x00,0x00] ++cto.d $r18, $r20 ++ ++# CHECK: ctz.d $r17, $r10 ++# CHECK: encoding: [0x51,0x2d,0x00,0x00] ++ctz.d $r17, $r10 ++ ++# CHECK: revb.2h $r20, $r11 ++# CHECK: encoding: [0x74,0x31,0x00,0x00] ++revb.2h $r20, $r11 ++ ++# CHECK: revb.4h $r13, $r19 ++# CHECK: encoding: [0x6d,0x36,0x00,0x00] ++revb.4h $r13, $r19 ++ ++# CHECK: revb.2w $r28, $r27 ++# CHECK: encoding: [0x7c,0x3b,0x00,0x00] ++revb.2w $r28, $r27 ++ ++# CHECK: revb.d $zero, $r23 ++# CHECK: encoding: [0xe0,0x3e,0x00,0x00] ++revb.d $zero, $r23 ++ ++# CHECK: revh.2w $r28, $r10 ++# CHECK: encoding: [0x5c,0x41,0x00,0x00] ++revh.2w $r28, $r10 ++ ++# CHECK: revh.d $r9, $r7 ++# CHECK: encoding: [0xe9,0x44,0x00,0x00] ++revh.d $r9, $r7 ++ ++# CHECK: bitrev.4b $r21, $r27 ++# CHECK: encoding: [0x75,0x4b,0x00,0x00] ++bitrev.4b $r21, $r27 ++ ++# CHECK: bitrev.8b $r13, $r25 ++# CHECK: encoding: [0x2d,0x4f,0x00,0x00] ++bitrev.8b $r13, $r25 ++ ++# CHECK: bitrev.w $r25, $r5 ++# CHECK: encoding: [0xb9,0x50,0x00,0x00] ++bitrev.w $r25, $r5 ++ ++# CHECK: bitrev.d $r19, $r23 ++# CHECK: encoding: [0xf3,0x56,0x00,0x00] ++bitrev.d $r19, $r23 ++ ++# CHECK: ext.w.h $r23, $r23 ++# CHECK: encoding: [0xf7,0x5a,0x00,0x00] ++ext.w.h $r23, $r23 ++ ++# CHECK: ext.w.b $r20, $r18 ++# CHECK: encoding: [0x54,0x5e,0x00,0x00] ++ext.w.b $r20, $r18 ++ ++# CHECK: rdtimel.w $r24, $r4 ++# CHECK: encoding: [0x98,0x60,0x00,0x00] ++rdtimel.w $r24, $r4 ++ ++# CHECK: rdtimeh.w $r11, $r5 ++# CHECK: encoding: [0xab,0x64,0x00,0x00] ++rdtimeh.w $r11, $r5 ++ ++# CHECK: rdtime.d $tp, $ra ++# CHECK: encoding: [0x22,0x68,0x00,0x00] ++rdtime.d $tp, $ra ++ ++# CHECK: cpucfg $sp, $ra ++# CHECK: encoding: [0x23,0x6c,0x00,0x00] ++cpucfg $sp, $ra ++ ++# CHECK: asrtle.d $r21, $r19 ++# CHECK: encoding: [0xa0,0x4e,0x01,0x00] ++asrtle.d $r21, $r19 ++ ++# CHECK: asrtgt.d $ra, $r19 ++# CHECK: encoding: [0x20,0xcc,0x01,0x00] ++asrtgt.d $ra, $r19 ++ ++# CHECK: alsl.w $tp, $r17, $tp, 4 ++# CHECK: encoding: [0x22,0x8a,0x05,0x00] ++alsl.w $tp, $r17, $tp, 4 ++ ++# CHECK: bytepick.w $r29, $zero, $r16, 0 ++# CHECK: encoding: [0x1d,0x40,0x08,0x00] ++bytepick.w $r29, $zero, $r16, 0 ++ ++# CHECK: bytepick.d $r15, $r17, $r20, 4 ++# CHECK: encoding: [0x2f,0x52,0x0e,0x00] ++bytepick.d $r15, $r17, $r20, 4 ++ ++# CHECK: add.w $r9, $ra, $r31 ++# CHECK: encoding: [0x29,0x7c,0x10,0x00] ++add.w $r9, $ra, $r31 ++ ++# CHECK: add.d $tp, $r18, $r27 ++# CHECK: encoding: [0x42,0xee,0x10,0x00] ++add.d $tp, $r18, $r27 ++ ++# CHECK: sub.w $r21, $r25, $r19 ++# CHECK: encoding: [0x35,0x4f,0x11,0x00] ++sub.w $r21, $r25, $r19 ++ ++# CHECK: sub.d $r7, $r12, $r7 ++# CHECK: encoding: [0x87,0x9d,0x11,0x00] ++sub.d $r7, $r12, $r7 ++ ++# CHECK: slt $r29, $r26, $tp ++# CHECK: encoding: [0x5d,0x0b,0x12,0x00] ++slt $r29, $r26, $tp ++ ++# CHECK: sltu $r11, $r21, $r29 ++# CHECK: encoding: [0xab,0xf6,0x12,0x00] ++sltu $r11, $r21, $r29 ++ ++# CHECK: maskeqz $r20, $r11, $r18 ++# CHECK: encoding: [0x74,0x49,0x13,0x00] ++maskeqz $r20, $r11, $r18 ++ ++# CHECK: masknez $r20, $r13, $r26 ++# CHECK: encoding: [0xb4,0xe9,0x13,0x00] ++masknez $r20, $r13, $r26 ++ ++# CHECK: nor $r5, $r18, $r5 ++# CHECK: encoding: [0x45,0x16,0x14,0x00] ++nor $r5, $r18, $r5 ++ ++# CHECK: and $r19, $r31, $ra ++# CHECK: encoding: [0xf3,0x87,0x14,0x00] ++and $r19, $r31, $ra ++ ++# CHECK: or $r17, $r16, $r30 ++# CHECK: encoding: [0x11,0x7a,0x15,0x00] ++or $r17, $r16, $r30 ++ ++# CHECK: xor $r15, $r19, $r8 ++# CHECK: encoding: [0x6f,0xa2,0x15,0x00] ++xor $r15, $r19, $r8 ++ ++# CHECK: orn $tp, $sp, $r25 ++# CHECK: encoding: [0x62,0x64,0x16,0x00] ++orn $tp, $sp, $r25 ++ ++# CHECK: andn $r28, $r25, $r5 ++# CHECK: encoding: [0x3c,0x97,0x16,0x00] ++andn $r28, $r25, $r5 ++ ++# CHECK: sll.w $r24, $r27, $r23 ++# CHECK: encoding: [0x78,0x5f,0x17,0x00] ++sll.w $r24, $r27, $r23 ++ ++# CHECK: srl.w $r31, $r17, $r7 ++# CHECK: encoding: [0x3f,0x9e,0x17,0x00] ++srl.w $r31, $r17, $r7 ++ ++# CHECK: sra.w $r12, $r28, $r10 ++# CHECK: encoding: [0x8c,0x2b,0x18,0x00] ++sra.w $r12, $r28, $r10 ++ ++# CHECK: sll.d $r20, $r15, $sp ++# CHECK: encoding: [0xf4,0x8d,0x18,0x00] ++sll.d $r20, $r15, $sp ++ ++# CHECK: srl.d $r14, $r25, $zero ++# CHECK: encoding: [0x2e,0x03,0x19,0x00] ++srl.d $r14, $r25, $zero ++ ++# CHECK: sra.d $r7, $r22, $r31 ++# CHECK: encoding: [0xc7,0xfe,0x19,0x00] ++sra.d $r7, $r22, $r31 ++ ++# CHECK: rotr.w $ra, $r26, $r18 ++# CHECK: encoding: [0x41,0x4b,0x1b,0x00] ++rotr.w $ra, $r26, $r18 ++ ++# CHECK: rotr.d $r31, $sp, $ra ++# CHECK: encoding: [0x7f,0x84,0x1b,0x00] ++rotr.d $r31, $sp, $ra ++ ++# CHECK: mul.w $r4, $r18, $sp ++# CHECK: encoding: [0x44,0x0e,0x1c,0x00] ++mul.w $r4, $r18, $sp ++ ++# CHECK: mulh.w $r27, $r23, $zero ++# CHECK: encoding: [0xfb,0x82,0x1c,0x00] ++mulh.w $r27, $r23, $zero ++ ++# CHECK: mulh.wu $r10, $r17, $r24 ++# CHECK: encoding: [0x2a,0x62,0x1d,0x00] ++mulh.wu $r10, $r17, $r24 ++ ++# CHECK: mul.d $ra, $r14, $r24 ++# CHECK: encoding: [0xc1,0xe1,0x1d,0x00] ++mul.d $ra, $r14, $r24 ++ ++# CHECK: mulh.d $r28, $ra, $r27 ++# CHECK: encoding: [0x3c,0x6c,0x1e,0x00] ++mulh.d $r28, $ra, $r27 ++ ++# CHECK: mulh.du $r13, $r27, $r29 ++# CHECK: encoding: [0x6d,0xf7,0x1e,0x00] ++mulh.du $r13, $r27, $r29 ++ ++# CHECK: mulw.d.w $r27, $r6, $r17 ++# CHECK: encoding: [0xdb,0x44,0x1f,0x00] ++mulw.d.w $r27, $r6, $r17 ++ ++# CHECK: mulw.d.wu $r17, $r22, $r30 ++# CHECK: encoding: [0xd1,0xfa,0x1f,0x00] ++mulw.d.wu $r17, $r22, $r30 ++ ++# CHECK: div.w $r30, $r13, $r25 ++# CHECK: encoding: [0xbe,0x65,0x20,0x00] ++div.w $r30, $r13, $r25 ++ ++# CHECK: mod.w $ra, $r26, $r10 ++# CHECK: encoding: [0x41,0xab,0x20,0x00] ++mod.w $ra, $r26, $r10 ++ ++# CHECK: div.wu $r19, $r23, $zero ++# CHECK: encoding: [0xf3,0x02,0x21,0x00] ++div.wu $r19, $r23, $zero ++ ++# CHECK: mod.wu $r27, $r9, $r17 ++# CHECK: encoding: [0x3b,0xc5,0x21,0x00] ++mod.wu $r27, $r9, $r17 ++ ++# CHECK: div.d $r23, $r6, $r21 ++# CHECK: encoding: [0xd7,0x54,0x22,0x00] ++div.d $r23, $r6, $r21 ++ ++# CHECK: mod.d $r16, $sp, $r15 ++# CHECK: encoding: [0x70,0xbc,0x22,0x00] ++mod.d $r16, $sp, $r15 ++ ++# CHECK: div.du $r31, $r24, $r14 ++# CHECK: encoding: [0x1f,0x3b,0x23,0x00] ++div.du $r31, $r24, $r14 ++ ++# CHECK: mod.du $r25, $r23, $r24 ++# CHECK: encoding: [0xf9,0xe2,0x23,0x00] ++mod.du $r25, $r23, $r24 ++ ++# CHECK: crc.w.b.w $r24, $r7, $tp ++# CHECK: encoding: [0xf8,0x08,0x24,0x00] ++crc.w.b.w $r24, $r7, $tp ++ ++# CHECK: crc.w.h.w $r31, $r10, $r18 ++# CHECK: encoding: [0x5f,0xc9,0x24,0x00] ++crc.w.h.w $r31, $r10, $r18 ++ ++# CHECK: crc.w.w.w $r28, $r6, $r10 ++# CHECK: encoding: [0xdc,0x28,0x25,0x00] ++crc.w.w.w $r28, $r6, $r10 ++ ++# CHECK: crc.w.d.w $r28, $r11, $r31 ++# CHECK: encoding: [0x7c,0xfd,0x25,0x00] ++crc.w.d.w $r28, $r11, $r31 ++ ++# CHECK: crcc.w.b.w $r15, $r18, $sp ++# CHECK: encoding: [0x4f,0x0e,0x26,0x00] ++crcc.w.b.w $r15, $r18, $sp ++ ++# CHECK: crcc.w.h.w $r21, $r29, $r18 ++# CHECK: encoding: [0xb5,0xcb,0x26,0x00] ++crcc.w.h.w $r21, $r29, $r18 ++ ++# CHECK: crcc.w.w.w $r17, $r14, $r13 ++# CHECK: encoding: [0xd1,0x35,0x27,0x00] ++crcc.w.w.w $r17, $r14, $r13 ++ ++# CHECK: crcc.w.d.w $r30, $r21, $r27 ++# CHECK: encoding: [0xbe,0xee,0x27,0x00] ++crcc.w.d.w $r30, $r21, $r27 ++ ++# CHECK: break 23 ++# CHECK: encoding: [0x17,0x00,0x2a,0x00] ++break 23 ++ ++# CHECK: syscall 2 ++# CHECK: encoding: [0x02,0x00,0x2b,0x00] ++syscall 2 ++ ++# CHECK: alsl.d $r17, $r11, $r5, 3 ++# CHECK: encoding: [0x71,0x15,0x2d,0x00] ++alsl.d $r17, $r11, $r5, 3 ++ ++# CHECK: slli.w $r26, $r18, 0 ++# CHECK: encoding: [0x5a,0x82,0x40,0x00] ++slli.w $r26, $r18, 0 ++ ++# CHECK: slli.d $r10, $r31, 39 ++# CHECK: encoding: [0xea,0x9f,0x41,0x00] ++slli.d $r10, $r31, 39 ++ ++# CHECK: srli.w $r10, $r14, 30 ++# CHECK: encoding: [0xca,0xf9,0x44,0x00] ++srli.w $r10, $r14, 30 ++ ++# CHECK: srli.d $r31, $r22, 38 ++# CHECK: encoding: [0xdf,0x9a,0x45,0x00] ++srli.d $r31, $r22, 38 ++ ++# CHECK: srai.w $r8, $r17, 24 ++# CHECK: encoding: [0x28,0xe2,0x48,0x00] ++srai.w $r8, $r17, 24 ++ ++# CHECK: srai.d $r9, $r21, 27 ++# CHECK: encoding: [0xa9,0x6e,0x49,0x00] ++srai.d $r9, $r21, 27 ++ ++# CHECK: rotri.w $r23, $r20, 23 ++# CHECK: encoding: [0x97,0xde,0x4c,0x00] ++rotri.w $r23, $r20, 23 ++ ++# CHECK: rotri.d $r29, $zero, 7 ++# CHECK: encoding: [0x1d,0x1c,0x4d,0x00] ++rotri.d $r29, $zero, 7 ++ ++# CHECK: bstrins.w $r8, $r11, 7, 2 ++# CHECK: encoding: [0x68,0x09,0x67,0x00] ++bstrins.w $r8, $r11, 7, 2 ++ ++# CHECK: bstrins.d $r8, $r11, 7, 2 ++# CHECK: encoding: [0x68,0x09,0x87,0x00] ++bstrins.d $r8, $r11, 7, 2 ++ ++# CHECK: bstrpick.w $ra, $r9, 10, 4 ++# CHECK: encoding: [0x21,0x91,0x6a,0x00] ++bstrpick.w $ra, $r9, 10, 4 ++ ++# CHECK: bstrpick.d $r31, $r27, 39, 22 ++# CHECK: encoding: [0x7f,0x5b,0xe7,0x00] ++bstrpick.d $r31, $r27, 39, 22 ++ ++# CHECK: cpucfg $sp, $r8 ++# CHECK: encoding: [0x03,0x6d,0x00,0x00] ++cpucfg $sp, $r8 ++ ++# CHECK: alsl.wu $r19, $r8, $r25, 1 ++# CHECK: encoding: [0x13,0x65,0x06,0x00] ++alsl.wu $r19, $r8, $r25, 1 ++ +diff --git a/llvm/test/MC/LoongArch/valid_memory.s b/llvm/test/MC/LoongArch/valid_memory.s +new file mode 100644 +index 000000000..30ea88c99 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_memory.s +@@ -0,0 +1,405 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: dbar 0 ++# CHECK: encoding: [0x00,0x00,0x72,0x38] ++dbar 0 ++ ++# CHECK: ibar 0 ++# CHECK: encoding: [0x00,0x80,0x72,0x38] ++ibar 0 ++ ++# CHECK: ll.w $tp, $r27, 220 ++# CHECK: encoding: [0x62,0xdf,0x00,0x20] ++ll.w $tp, $r27, 220 ++ ++# CHECK: sc.w $r19, $r14, 56 ++# CHECK: encoding: [0xd3,0x39,0x00,0x21] ++sc.w $r19, $r14, 56 ++ ++# CHECK: ll.d $r25, $r27, 16 ++# CHECK: encoding: [0x79,0x13,0x00,0x22] ++ll.d $r25, $r27, 16 ++ ++# CHECK: sc.d $r17, $r17, 244 ++# CHECK: encoding: [0x31,0xf6,0x00,0x23] ++sc.d $r17, $r17, 244 ++ ++# CHECK: ldptr.w $r26, $r6, 60 ++# CHECK: encoding: [0xda,0x3c,0x00,0x24] ++ldptr.w $r26, $r6, 60 ++ ++# CHECK: stptr.w $r28, $r5, 216 ++# CHECK: encoding: [0xbc,0xd8,0x00,0x25] ++stptr.w $r28, $r5, 216 ++ ++# CHECK: ldptr.d $r5, $r29, 244 ++# CHECK: encoding: [0xa5,0xf7,0x00,0x26] ++ldptr.d $r5, $r29, 244 ++ ++# CHECK: stptr.d $r14, $r24, 196 ++# CHECK: encoding: [0x0e,0xc7,0x00,0x27] ++stptr.d $r14, $r24, 196 ++ ++# CHECK: ld.b $r24, $r8, 21 ++# CHECK: encoding: [0x18,0x55,0x00,0x28] ++ld.b $r24, $r8, 21 ++ ++# CHECK: ld.h $r7, $r18, 80 ++# CHECK: encoding: [0x47,0x42,0x41,0x28] ++ld.h $r7, $r18, 80 ++ ++# CHECK: ld.w $r18, $r26, 92 ++# CHECK: encoding: [0x52,0x73,0x81,0x28] ++ld.w $r18, $r26, 92 ++ ++# CHECK: ld.d $r18, $r20, 159 ++# CHECK: encoding: [0x92,0x7e,0xc2,0x28] ++ld.d $r18, $r20, 159 ++ ++# CHECK: st.b $sp, $r7, 95 ++# CHECK: encoding: [0xe3,0x7c,0x01,0x29] ++st.b $sp, $r7, 95 ++ ++# CHECK: st.h $r25, $r16, 122 ++# CHECK: encoding: [0x19,0xea,0x41,0x29] ++st.h $r25, $r16, 122 ++ ++# CHECK: st.w $r13, $r13, 175 ++# CHECK: encoding: [0xad,0xbd,0x82,0x29] ++st.w $r13, $r13, 175 ++ ++# CHECK: st.d $r30, $r30, 60 ++# CHECK: encoding: [0xde,0xf3,0xc0,0x29] ++st.d $r30, $r30, 60 ++ ++# CHECK: ld.bu $r13, $r13, 150 ++# CHECK: encoding: [0xad,0x59,0x02,0x2a] ++ld.bu $r13, $r13, 150 ++ ++# CHECK: ld.hu $r18, $r29, 198 ++# CHECK: encoding: [0xb2,0x1b,0x43,0x2a] ++ld.hu $r18, $r29, 198 ++ ++# CHECK: ld.wu $r14, $r19, 31 ++# CHECK: encoding: [0x6e,0x7e,0x80,0x2a] ++ld.wu $r14, $r19, 31 ++ ++# CHECK: fld.s $f23, $r15, 250 ++# CHECK: encoding: [0xf7,0xe9,0x03,0x2b] ++fld.s $f23, $r15, 250 ++ ++# CHECK: fst.s $f30, $r19, 230 ++# CHECK: encoding: [0x7e,0x9a,0x43,0x2b] ++fst.s $f30, $r19, 230 ++ ++# CHECK: fld.d $f22, $r17, 114 ++# CHECK: encoding: [0x36,0xca,0x81,0x2b] ++fld.d $f22, $r17, 114 ++ ++# CHECK: fst.d $f28, $r7, 198 ++# CHECK: encoding: [0xfc,0x18,0xc3,0x2b] ++fst.d $f28, $r7, 198 ++ ++# CHECK: ldx.b $r24, $ra, $tp ++# CHECK: encoding: [0x38,0x08,0x00,0x38] ++ldx.b $r24, $ra, $tp ++ ++# CHECK: ldx.h $r22, $r22, $r17 ++# CHECK: encoding: [0xd6,0x46,0x04,0x38] ++ldx.h $r22, $r22, $r17 ++ ++# CHECK: ldx.w $r25, $r11, $r23 ++# CHECK: encoding: [0x79,0x5d,0x08,0x38] ++ldx.w $r25, $r11, $r23 ++ ++# CHECK: ldx.d $r18, $r23, $r20 ++# CHECK: encoding: [0xf2,0x52,0x0c,0x38] ++ldx.d $r18, $r23, $r20 ++ ++# CHECK: stx.b $r19, $ra, $sp ++# CHECK: encoding: [0x33,0x0c,0x10,0x38] ++stx.b $r19, $ra, $sp ++ ++# CHECK: stx.h $zero, $r28, $r26 ++# CHECK: encoding: [0x80,0x6b,0x14,0x38] ++stx.h $zero, $r28, $r26 ++ ++# CHECK: stx.w $r7, $r4, $r31 ++# CHECK: encoding: [0x87,0x7c,0x18,0x38] ++stx.w $r7, $r4, $r31 ++ ++# CHECK: stx.d $r7, $r31, $r10 ++# CHECK: encoding: [0xe7,0x2b,0x1c,0x38] ++stx.d $r7, $r31, $r10 ++ ++# CHECK: ldx.bu $r11, $r9, $r9 ++# CHECK: encoding: [0x2b,0x25,0x20,0x38] ++ldx.bu $r11, $r9, $r9 ++ ++# CHECK: ldx.hu $r22, $r23, $r27 ++# CHECK: encoding: [0xf6,0x6e,0x24,0x38] ++ldx.hu $r22, $r23, $r27 ++ ++# CHECK: ldx.wu $r8, $r24, $r28 ++# CHECK: encoding: [0x08,0x73,0x28,0x38] ++ldx.wu $r8, $r24, $r28 ++ ++# CHECK: fldx.s $f1, $r15, $r19 ++# CHECK: encoding: [0xe1,0x4d,0x30,0x38] ++fldx.s $f1, $r15, $r19 ++ ++# CHECK: fldx.d $f27, $r13, $r31 ++# CHECK: encoding: [0xbb,0x7d,0x34,0x38] ++fldx.d $f27, $r13, $r31 ++ ++# CHECK: fstx.s $f26, $sp, $r22 ++# CHECK: encoding: [0x7a,0x58,0x38,0x38] ++fstx.s $f26, $sp, $r22 ++ ++# CHECK: fstx.d $f6, $r15, $r17 ++# CHECK: encoding: [0xe6,0x45,0x3c,0x38] ++fstx.d $f6, $r15, $r17 ++ ++# CHECK: amswap_db.w $r6, $r12, $r24, 0 ++# CHECK: encoding: [0x06,0x33,0x69,0x38] ++amswap_db.w $r6, $r12, $r24, 0 ++ ++# CHECK: amswap_db.d $tp, $r14, $r22, 0 ++# CHECK: encoding: [0xc2,0xba,0x69,0x38] ++amswap_db.d $tp, $r14, $r22, 0 ++ ++# CHECK: amadd_db.w $r8, $r12, $r21, 0 ++# CHECK: encoding: [0xa8,0x32,0x6a,0x38] ++amadd_db.w $r8, $r12, $r21, 0 ++ ++# CHECK: amadd_db.d $r5, $r17, $r29, 0 ++# CHECK: encoding: [0xa5,0xc7,0x6a,0x38] ++amadd_db.d $r5, $r17, $r29, 0 ++ ++# CHECK: amand_db.w $r4, $r19, $r22, 0 ++# CHECK: encoding: [0xc4,0x4e,0x6b,0x38] ++amand_db.w $r4, $r19, $r22, 0 ++ ++# CHECK: amand_db.d $r10, $r18, $r29, 0 ++# CHECK: encoding: [0xaa,0xcb,0x6b,0x38] ++amand_db.d $r10, $r18, $r29, 0 ++ ++# CHECK: amor_db.w $r6, $r16, $r23, 0 ++# CHECK: encoding: [0xe6,0x42,0x6c,0x38] ++amor_db.w $r6, $r16, $r23, 0 ++ ++# CHECK: amor_db.d $sp, $r16, $r24, 0 ++# CHECK: encoding: [0x03,0xc3,0x6c,0x38] ++amor_db.d $sp, $r16, $r24, 0 ++ ++# CHECK: amxor_db.w $tp, $r15, $r23, 0 ++# CHECK: encoding: [0xe2,0x3e,0x6d,0x38] ++amxor_db.w $tp, $r15, $r23, 0 ++ ++# CHECK: amxor_db.d $r8, $r20, $r28, 0 ++# CHECK: encoding: [0x88,0xd3,0x6d,0x38] ++amxor_db.d $r8, $r20, $r28, 0 ++ ++# CHECK: ammax_db.w $ra, $r11, $r23, 0 ++# CHECK: encoding: [0xe1,0x2e,0x6e,0x38] ++ammax_db.w $ra, $r11, $r23, 0 ++ ++# CHECK: ammax_db.d $r9, $r20, $r27, 0 ++# CHECK: encoding: [0x69,0xd3,0x6e,0x38] ++ammax_db.d $r9, $r20, $r27, 0 ++ ++# CHECK: ammin_db.w $r9, $r14, $r23, 0 ++# CHECK: encoding: [0xe9,0x3a,0x6f,0x38] ++ammin_db.w $r9, $r14, $r23, 0 ++ ++# CHECK: ammin_db.d $r9, $r13, $r22, 0 ++# CHECK: encoding: [0xc9,0xb6,0x6f,0x38] ++ammin_db.d $r9, $r13, $r22, 0 ++ ++# CHECK: ammax_db.wu $r9, $r11, $r22, 0 ++# CHECK: encoding: [0xc9,0x2e,0x70,0x38] ++ammax_db.wu $r9, $r11, $r22, 0 ++ ++# CHECK: ammax_db.du $r6, $r16, $r25, 0 ++# CHECK: encoding: [0x26,0xc3,0x70,0x38] ++ammax_db.du $r6, $r16, $r25, 0 ++ ++# CHECK: ammin_db.wu $r8, $r18, $r30, 0 ++# CHECK: encoding: [0xc8,0x4b,0x71,0x38] ++ammin_db.wu $r8, $r18, $r30, 0 ++ ++# CHECK: ammin_db.du $r7, $r16, $r25, 0 ++# CHECK: encoding: [0x27,0xc3,0x71,0x38] ++ammin_db.du $r7, $r16, $r25, 0 ++ ++# CHECK: amswap.w $r6, $r12, $r24, 0 ++# CHECK: encoding: [0x06,0x33,0x60,0x38] ++amswap.w $r6, $r12, $r24, 0 ++ ++# CHECK: amswap.d $tp, $r14, $r22, 0 ++# CHECK: encoding: [0xc2,0xba,0x60,0x38] ++amswap.d $tp, $r14, $r22, 0 ++ ++# CHECK: amadd.w $r8, $r12, $r21, 0 ++# CHECK: encoding: [0xa8,0x32,0x61,0x38] ++amadd.w $r8, $r12, $r21, 0 ++ ++# CHECK: amadd.d $r5, $r17, $r29, 0 ++# CHECK: encoding: [0xa5,0xc7,0x61,0x38] ++amadd.d $r5, $r17, $r29, 0 ++ ++# CHECK: amand.w $r4, $r19, $r22, 0 ++# CHECK: encoding: [0xc4,0x4e,0x62,0x38] ++amand.w $r4, $r19, $r22, 0 ++ ++# CHECK: amand.d $r10, $r18, $r29, 0 ++# CHECK: encoding: [0xaa,0xcb,0x62,0x38] ++amand.d $r10, $r18, $r29, 0 ++ ++# CHECK: amor.w $r6, $r16, $r23, 0 ++# CHECK: encoding: [0xe6,0x42,0x63,0x38] ++amor.w $r6, $r16, $r23, 0 ++ ++# CHECK: amor.d $sp, $r16, $r24, 0 ++# CHECK: encoding: [0x03,0xc3,0x63,0x38] ++amor.d $sp, $r16, $r24, 0 ++ ++# CHECK: amxor.w $tp, $r15, $r23, 0 ++# CHECK: encoding: [0xe2,0x3e,0x64,0x38] ++amxor.w $tp, $r15, $r23, 0 ++ ++# CHECK: amxor.d $r8, $r20, $r28, 0 ++# CHECK: encoding: [0x88,0xd3,0x64,0x38] ++amxor.d $r8, $r20, $r28, 0 ++ ++# CHECK: ammax.w $ra, $r11, $r23, 0 ++# CHECK: encoding: [0xe1,0x2e,0x65,0x38] ++ammax.w $ra, $r11, $r23, 0 ++ ++# CHECK: ammax.d $r9, $r20, $r27, 0 ++# CHECK: encoding: [0x69,0xd3,0x65,0x38] ++ammax.d $r9, $r20, $r27, 0 ++ ++# CHECK: ammin.w $r9, $r14, $r23, 0 ++# CHECK: encoding: [0xe9,0x3a,0x66,0x38] ++ammin.w $r9, $r14, $r23, 0 ++ ++# CHECK: ammin.d $r9, $r13, $r22, 0 ++# CHECK: encoding: [0xc9,0xb6,0x66,0x38] ++ammin.d $r9, $r13, $r22, 0 ++ ++# CHECK: ammax.wu $r9, $r11, $r22, 0 ++# CHECK: encoding: [0xc9,0x2e,0x67,0x38] ++ammax.wu $r9, $r11, $r22, 0 ++ ++# CHECK: ammax.du $r6, $r16, $r25, 0 ++# CHECK: encoding: [0x26,0xc3,0x67,0x38] ++ammax.du $r6, $r16, $r25, 0 ++ ++# CHECK: ammin.wu $r8, $r18, $r30, 0 ++# CHECK: encoding: [0xc8,0x4b,0x68,0x38] ++ammin.wu $r8, $r18, $r30, 0 ++ ++# CHECK: ammin.du $r7, $r16, $r25, 0 ++# CHECK: encoding: [0x27,0xc3,0x68,0x38] ++ammin.du $r7, $r16, $r25, 0 ++ ++# CHECK: fldgt.s $f3, $r27, $r13 ++# CHECK: encoding: [0x63,0x37,0x74,0x38] ++fldgt.s $f3, $r27, $r13 ++ ++# CHECK: fldgt.d $f26, $r5, $r31 ++# CHECK: encoding: [0xba,0xfc,0x74,0x38] ++fldgt.d $f26, $r5, $r31 ++ ++# CHECK: fldle.s $f24, $r29, $r17 ++# CHECK: encoding: [0xb8,0x47,0x75,0x38] ++fldle.s $f24, $r29, $r17 ++ ++# CHECK: fldle.d $f3, $r15, $r22 ++# CHECK: encoding: [0xe3,0xd9,0x75,0x38] ++fldle.d $f3, $r15, $r22 ++ ++# CHECK: fstgt.s $f31, $r13, $r30 ++# CHECK: encoding: [0xbf,0x79,0x76,0x38] ++fstgt.s $f31, $r13, $r30 ++ ++# CHECK: fstgt.d $f13, $r11, $r26 ++# CHECK: encoding: [0x6d,0xe9,0x76,0x38] ++fstgt.d $f13, $r11, $r26 ++ ++# CHECK: fstle.s $f13, $r13, $r7 ++# CHECK: encoding: [0xad,0x1d,0x77,0x38] ++fstle.s $f13, $r13, $r7 ++ ++# CHECK: fstle.d $f18, $r9, $r13 ++# CHECK: encoding: [0x32,0xb5,0x77,0x38] ++fstle.d $f18, $r9, $r13 ++ ++# CHECK: preld 10, $zero, 23 ++# CHECK: encoding: [0x0a,0x5c,0xc0,0x2a] ++preld 10, $zero, 23 ++ ++# CHECK: ldgt.b $r6, $r6, $r29 ++# CHECK: encoding: [0xc6,0x74,0x78,0x38] ++ldgt.b $r6, $r6, $r29 ++ ++# CHECK: ldgt.h $r5, $r31, $ra ++# CHECK: encoding: [0xe5,0x87,0x78,0x38] ++ldgt.h $r5, $r31, $ra ++ ++# CHECK: ldgt.w $r15, $r26, $r8 ++# CHECK: encoding: [0x4f,0x23,0x79,0x38] ++ldgt.w $r15, $r26, $r8 ++ ++# CHECK: ldgt.d $r23, $r25, $r31 ++# CHECK: encoding: [0x37,0xff,0x79,0x38] ++ldgt.d $r23, $r25, $r31 ++ ++# CHECK: ldle.b $r9, $r12, $r15 ++# CHECK: encoding: [0x89,0x3d,0x7a,0x38] ++ldle.b $r9, $r12, $r15 ++ ++# CHECK: ldle.h $r11, $r11, $r23 ++# CHECK: encoding: [0x6b,0xdd,0x7a,0x38] ++ldle.h $r11, $r11, $r23 ++ ++# CHECK: ldle.w $r24, $tp, $tp ++# CHECK: encoding: [0x58,0x08,0x7b,0x38] ++ldle.w $r24, $tp, $tp ++ ++# CHECK: ldle.d $r20, $r15, $r16 ++# CHECK: encoding: [0xf4,0xc1,0x7b,0x38] ++ldle.d $r20, $r15, $r16 ++ ++# CHECK: stgt.b $r27, $r19, $r20 ++# CHECK: encoding: [0x7b,0x52,0x7c,0x38] ++stgt.b $r27, $r19, $r20 ++ ++# CHECK: stgt.h $r16, $r4, $r6 ++# CHECK: encoding: [0x90,0x98,0x7c,0x38] ++stgt.h $r16, $r4, $r6 ++ ++# CHECK: stgt.w $r31, $r28, $r14 ++# CHECK: encoding: [0x9f,0x3b,0x7d,0x38] ++stgt.w $r31, $r28, $r14 ++ ++# CHECK: stgt.d $r30, $r21, $r24 ++# CHECK: encoding: [0xbe,0xe2,0x7d,0x38] ++stgt.d $r30, $r21, $r24 ++ ++# CHECK: stle.b $r10, $r4, $r16 ++# CHECK: encoding: [0x8a,0x40,0x7e,0x38] ++stle.b $r10, $r4, $r16 ++ ++# CHECK: stle.h $r17, $r17, $r21 ++# CHECK: encoding: [0x31,0xd6,0x7e,0x38] ++stle.h $r17, $r17, $r21 ++ ++# CHECK: stle.w $r23, $r28, $r29 ++# CHECK: encoding: [0x97,0x77,0x7f,0x38] ++stle.w $r23, $r28, $r29 ++ ++# CHECK: stle.d $r25, $r24, $r29 ++# CHECK: encoding: [0x19,0xf7,0x7f,0x38] ++stle.d $r25, $r24, $r29 ++ +diff --git a/llvm/test/MC/LoongArch/valid_priv.s b/llvm/test/MC/LoongArch/valid_priv.s +new file mode 100644 +index 000000000..57a252a8d +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_priv.s +@@ -0,0 +1,125 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: csrrd $r26, 30 ++# CHECK: encoding: [0x1a,0x78,0x00,0x04] ++csrrd $r26, 30 ++ ++# CHECK: csrwr $r24, 194 ++# CHECK: encoding: [0x38,0x08,0x03,0x04] ++csrwr $r24, 194 ++ ++# CHECK: csrxchg $r6, $r27, 214 ++# CHECK: encoding: [0x66,0x5b,0x03,0x04] ++csrxchg $r6, $r27, 214 ++ ++# CHECK: cacop 0, $r10, 27 ++# CHECK: encoding: [0x40,0x6d,0x00,0x06] ++cacop 0, $r10, 27 ++ ++# CHECK: lddir $r12, $r30, 92 ++# CHECK: encoding: [0xcc,0x73,0x41,0x06] ++lddir $r12, $r30, 92 ++ ++# CHECK: ldpte $r18, 200 ++# CHECK: encoding: [0x40,0x22,0x47,0x06] ++ldpte $r18, 200 ++ ++# CHECK: iocsrrd.b $r26, $r24 ++# CHECK: encoding: [0x1a,0x03,0x48,0x06] ++iocsrrd.b $r26, $r24 ++ ++# CHECK: iocsrrd.h $r5, $r27 ++# CHECK: encoding: [0x65,0x07,0x48,0x06] ++iocsrrd.h $r5, $r27 ++ ++# CHECK: iocsrrd.w $r10, $r20 ++# CHECK: encoding: [0x8a,0x0a,0x48,0x06] ++iocsrrd.w $r10, $r20 ++ ++# CHECK: iocsrrd.d $r17, $r25 ++# CHECK: encoding: [0x31,0x0f,0x48,0x06] ++iocsrrd.d $r17, $r25 ++ ++# CHECK: iocsrwr.b $r4, $r23 ++# CHECK: encoding: [0xe4,0x12,0x48,0x06] ++iocsrwr.b $r4, $r23 ++ ++# CHECK: iocsrwr.h $r11, $zero ++# CHECK: encoding: [0x0b,0x14,0x48,0x06] ++iocsrwr.h $r11, $zero ++ ++# CHECK: iocsrwr.w $r20, $r26 ++# CHECK: encoding: [0x54,0x1b,0x48,0x06] ++iocsrwr.w $r20, $r26 ++ ++# CHECK: iocsrwr.d $r20, $r7 ++# CHECK: encoding: [0xf4,0x1c,0x48,0x06] ++iocsrwr.d $r20, $r7 ++ ++# CHECK: tlbclr ++# CHECK: encoding: [0x00,0x20,0x48,0x06] ++tlbclr ++ ++# CHECK: tlbflush ++# CHECK: encoding: [0x00,0x24,0x48,0x06] ++tlbflush ++ ++# CHECK: tlbsrch ++# CHECK: encoding: [0x00,0x28,0x48,0x06] ++tlbsrch ++ ++# CHECK: tlbrd ++# CHECK: encoding: [0x00,0x2c,0x48,0x06] ++tlbrd ++ ++# CHECK: tlbwr ++# CHECK: encoding: [0x00,0x30,0x48,0x06] ++tlbwr ++ ++# CHECK: tlbfill ++# CHECK: encoding: [0x00,0x34,0x48,0x06] ++tlbfill ++ ++# CHECK: ertn ++# CHECK: encoding: [0x00,0x38,0x48,0x06] ++ertn ++ ++# CHECK: idle 204 ++# CHECK: encoding: [0xcc,0x80,0x48,0x06] ++idle 204 ++ ++# CHECK: invtlb 16, $r29, $r25 ++# CHECK: encoding: [0xb0,0xe7,0x49,0x06] ++invtlb 16, $r29, $r25 ++ ++# CHECK: rdtimel.w $r30, $r19 ++# CHECK: encoding: [0x7e,0x62,0x00,0x00] ++rdtimel.w $r30, $r19 ++ ++# CHECK: rdtimeh.w $r19, $r14 ++# CHECK: encoding: [0xd3,0x65,0x00,0x00] ++rdtimeh.w $r19, $r14 ++ ++# CHECK: rdtime.d $tp, $r15 ++# CHECK: encoding: [0xe2,0x69,0x00,0x00] ++rdtime.d $tp, $r15 ++ ++# CHECK: asrtle.d $r12, $r17 ++# CHECK: encoding: [0x80,0x45,0x01,0x00] ++asrtle.d $r12, $r17 ++ ++# CHECK: asrtgt.d $r20, $r20 ++# CHECK: encoding: [0x80,0xd2,0x01,0x00] ++asrtgt.d $r20, $r20 ++ ++# CHECK: break 199 ++# CHECK: encoding: [0xc7,0x00,0x2a,0x00] ++break 199 ++ ++# CHECK: dbcl 201 ++# CHECK: encoding: [0xc9,0x80,0x2a,0x00] ++dbcl 201 ++ ++# CHECK: syscall 100 ++# CHECK: encoding: [0x64,0x00,0x2b,0x00] ++syscall 100 ++ +diff --git a/llvm/test/MC/LoongArch/valid_simd.s b/llvm/test/MC/LoongArch/valid_simd.s +new file mode 100644 +index 000000000..7db00e8bb +--- /dev/null ++++ b/llvm/test/MC/LoongArch/valid_simd.s +@@ -0,0 +1,5437 @@ ++# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s ++# CHECK: vfmadd.s $vr15, $vr22, $vr10, $vr18 ++# CHECK: encoding: [0xcf,0x2a,0x19,0x09] ++vfmadd.s $vr15, $vr22, $vr10, $vr18 ++ ++# CHECK: vfmadd.d $vr1, $vr0, $vr12, $vr10 ++# CHECK: encoding: [0x01,0x30,0x25,0x09] ++vfmadd.d $vr1, $vr0, $vr12, $vr10 ++ ++# CHECK: vfmsub.s $vr16, $vr18, $vr13, $vr8 ++# CHECK: encoding: [0x50,0x36,0x54,0x09] ++vfmsub.s $vr16, $vr18, $vr13, $vr8 ++ ++# CHECK: vfmsub.d $vr25, $vr13, $vr1, $vr20 ++# CHECK: encoding: [0xb9,0x05,0x6a,0x09] ++vfmsub.d $vr25, $vr13, $vr1, $vr20 ++ ++# CHECK: vfnmadd.s $vr22, $vr2, $vr17, $vr22 ++# CHECK: encoding: [0x56,0x44,0x9b,0x09] ++vfnmadd.s $vr22, $vr2, $vr17, $vr22 ++ ++# CHECK: vfnmadd.d $vr28, $vr29, $vr2, $vr14 ++# CHECK: encoding: [0xbc,0x0b,0xa7,0x09] ++vfnmadd.d $vr28, $vr29, $vr2, $vr14 ++ ++# CHECK: vfnmsub.s $vr19, $vr4, $vr17, $vr24 ++# CHECK: encoding: [0x93,0x44,0xdc,0x09] ++vfnmsub.s $vr19, $vr4, $vr17, $vr24 ++ ++# CHECK: vfnmsub.d $vr24, $vr22, $vr28, $vr30 ++# CHECK: encoding: [0xd8,0x72,0xef,0x09] ++vfnmsub.d $vr24, $vr22, $vr28, $vr30 ++ ++# CHECK: xvfmadd.s $xr15, $xr28, $xr9, $xr15 ++# CHECK: encoding: [0x8f,0xa7,0x17,0x0a] ++xvfmadd.s $xr15, $xr28, $xr9, $xr15 ++ ++# CHECK: xvfmadd.d $xr5, $xr24, $xr12, $xr10 ++# CHECK: encoding: [0x05,0x33,0x25,0x0a] ++xvfmadd.d $xr5, $xr24, $xr12, $xr10 ++ ++# CHECK: xvfmsub.s $xr20, $xr0, $xr27, $xr26 ++# CHECK: encoding: [0x14,0x6c,0x5d,0x0a] ++xvfmsub.s $xr20, $xr0, $xr27, $xr26 ++ ++# CHECK: xvfmsub.d $xr13, $xr8, $xr25, $xr26 ++# CHECK: encoding: [0x0d,0x65,0x6d,0x0a] ++xvfmsub.d $xr13, $xr8, $xr25, $xr26 ++ ++# CHECK: xvfnmadd.s $xr14, $xr14, $xr22, $xr8 ++# CHECK: encoding: [0xce,0x59,0x94,0x0a] ++xvfnmadd.s $xr14, $xr14, $xr22, $xr8 ++ ++# CHECK: xvfnmadd.d $xr25, $xr17, $xr0, $xr4 ++# CHECK: encoding: [0x39,0x02,0xa2,0x0a] ++xvfnmadd.d $xr25, $xr17, $xr0, $xr4 ++ ++# CHECK: xvfnmsub.s $xr11, $xr3, $xr0, $xr11 ++# CHECK: encoding: [0x6b,0x80,0xd5,0x0a] ++xvfnmsub.s $xr11, $xr3, $xr0, $xr11 ++ ++# CHECK: xvfnmsub.d $xr2, $xr3, $xr24, $xr22 ++# CHECK: encoding: [0x62,0x60,0xeb,0x0a] ++xvfnmsub.d $xr2, $xr3, $xr24, $xr22 ++ ++# CHECK: vfcmp.ceq.s $vr26, $vr15, $vr27 ++# CHECK: encoding: [0xfa,0x6d,0x52,0x0c] ++vfcmp.ceq.s $vr26, $vr15, $vr27 ++ ++# CHECK: vfcmp.ceq.d $vr21, $vr21, $vr1 ++# CHECK: encoding: [0xb5,0x06,0x62,0x0c] ++vfcmp.ceq.d $vr21, $vr21, $vr1 ++ ++# CHECK: xvfcmp.ceq.s $xr8, $xr9, $xr19 ++# CHECK: encoding: [0x28,0x4d,0x92,0x0c] ++xvfcmp.ceq.s $xr8, $xr9, $xr19 ++ ++# CHECK: xvfcmp.ceq.d $xr25, $xr16, $xr28 ++# CHECK: encoding: [0x19,0x72,0xa2,0x0c] ++xvfcmp.ceq.d $xr25, $xr16, $xr28 ++ ++# CHECK: vbitsel.v $vr20, $vr23, $vr29, $vr9 ++# CHECK: encoding: [0xf4,0xf6,0x14,0x0d] ++vbitsel.v $vr20, $vr23, $vr29, $vr9 ++ ++# CHECK: xvbitsel.v $xr7, $xr26, $xr28, $xr23 ++# CHECK: encoding: [0x47,0xf3,0x2b,0x0d] ++xvbitsel.v $xr7, $xr26, $xr28, $xr23 ++ ++# CHECK: vshuf.b $vr11, $vr4, $vr7, $vr9 ++# CHECK: encoding: [0x8b,0x9c,0x54,0x0d] ++vshuf.b $vr11, $vr4, $vr7, $vr9 ++ ++# CHECK: xvshuf.b $xr16, $xr21, $xr10, $xr12 ++# CHECK: encoding: [0xb0,0x2a,0x66,0x0d] ++xvshuf.b $xr16, $xr21, $xr10, $xr12 ++ ++# CHECK: vld $vr28, $r25, -510 ++# CHECK: encoding: [0x3c,0x0b,0x38,0x2c] ++vld $vr28, $r25, -510 ++ ++# CHECK: vst $vr28, $r14, 527 ++# CHECK: encoding: [0xdc,0x3d,0x48,0x2c] ++vst $vr28, $r14, 527 ++ ++# CHECK: xvld $xr11, $r6, 512 ++# CHECK: encoding: [0xcb,0x00,0x88,0x2c] ++xvld $xr11, $r6, 512 ++ ++# CHECK: xvst $xr13, $r7, 1215 ++# CHECK: encoding: [0xed,0xfc,0xd2,0x2c] ++xvst $xr13, $r7, 1215 ++ ++# CHECK: vldrepl.d $vr8, $r9, -1544 ++# CHECK: encoding: [0x28,0xfd,0x14,0x30] ++vldrepl.d $vr8, $r9, -1544 ++ ++# CHECK: vldrepl.w $vr2, $r9, -296 ++# CHECK: encoding: [0x22,0xd9,0x2e,0x30] ++vldrepl.w $vr2, $r9, -296 ++ ++# CHECK: vldrepl.h $vr28, $r23, 252 ++# CHECK: encoding: [0xfc,0xfa,0x41,0x30] ++vldrepl.h $vr28, $r23, 252 ++ ++# CHECK: vldrepl.b $vr5, $r9, -725 ++# CHECK: encoding: [0x25,0xad,0xb4,0x30] ++vldrepl.b $vr5, $r9, -725 ++ ++# CHECK: vstelm.d $vr23, $r26, 680, 1 ++# CHECK: encoding: [0x57,0x57,0x15,0x31] ++vstelm.d $vr23, $r26, 680, 1 ++ ++# CHECK: vstelm.w $vr30, $r23, -372, 1 ++# CHECK: encoding: [0xfe,0x8e,0x26,0x31] ++vstelm.w $vr30, $r23, -372, 1 ++ ++# CHECK: vstelm.h $vr11, $r6, 30, 7 ++# CHECK: encoding: [0xcb,0x3c,0x5c,0x31] ++vstelm.h $vr11, $r6, 30, 7 ++ ++# CHECK: vstelm.b $vr3, $r15, 44, 14 ++# CHECK: encoding: [0xe3,0xb1,0xb8,0x31] ++vstelm.b $vr3, $r15, 44, 14 ++ ++# CHECK: xvldrepl.d $xr24, $r8, 840 ++# CHECK: encoding: [0x18,0xa5,0x11,0x32] ++xvldrepl.d $xr24, $r8, 840 ++ ++# CHECK: xvldrepl.w $xr14, $r24, 492 ++# CHECK: encoding: [0x0e,0xef,0x21,0x32] ++xvldrepl.w $xr14, $r24, 492 ++ ++# CHECK: xvldrepl.h $xr18, $r9, 804 ++# CHECK: encoding: [0x32,0x49,0x46,0x32] ++xvldrepl.h $xr18, $r9, 804 ++ ++# CHECK: xvldrepl.b $xr6, $r29, 811 ++# CHECK: encoding: [0xa6,0xaf,0x8c,0x32] ++xvldrepl.b $xr6, $r29, 811 ++ ++# CHECK: xvstelm.d $xr21, $sp, -216, 0 ++# CHECK: encoding: [0x75,0x94,0x13,0x33] ++xvstelm.d $xr21, $sp, -216, 0 ++ ++# CHECK: xvstelm.w $xr31, $r29, 424, 0 ++# CHECK: encoding: [0xbf,0xab,0x21,0x33] ++xvstelm.w $xr31, $r29, 424, 0 ++ ++# CHECK: xvstelm.h $xr14, $r7, 90, 4 ++# CHECK: encoding: [0xee,0xb4,0x50,0x33] ++xvstelm.h $xr14, $r7, 90, 4 ++ ++# CHECK: xvstelm.b $xr21, $r24, -5, 8 ++# CHECK: encoding: [0x15,0xef,0xa3,0x33] ++xvstelm.b $xr21, $r24, -5, 8 ++ ++# CHECK: vldx $vr29, $r4, $r30 ++# CHECK: encoding: [0x9d,0x78,0x40,0x38] ++vldx $vr29, $r4, $r30 ++ ++# CHECK: vstx $vr31, $r28, $r29 ++# CHECK: encoding: [0x9f,0x77,0x44,0x38] ++vstx $vr31, $r28, $r29 ++ ++# CHECK: xvldx $xr8, $r30, $r24 ++# CHECK: encoding: [0xc8,0x63,0x48,0x38] ++xvldx $xr8, $r30, $r24 ++ ++# CHECK: xvstx $xr2, $r9, $r29 ++# CHECK: encoding: [0x22,0x75,0x4c,0x38] ++xvstx $xr2, $r9, $r29 ++ ++# CHECK: vseq.b $vr28, $vr26, $vr23 ++# CHECK: encoding: [0x5c,0x5f,0x00,0x70] ++vseq.b $vr28, $vr26, $vr23 ++ ++# CHECK: vseq.h $vr10, $vr1, $vr5 ++# CHECK: encoding: [0x2a,0x94,0x00,0x70] ++vseq.h $vr10, $vr1, $vr5 ++ ++# CHECK: vseq.w $vr3, $vr27, $vr17 ++# CHECK: encoding: [0x63,0x47,0x01,0x70] ++vseq.w $vr3, $vr27, $vr17 ++ ++# CHECK: vseq.d $vr5, $vr3, $vr3 ++# CHECK: encoding: [0x65,0x8c,0x01,0x70] ++vseq.d $vr5, $vr3, $vr3 ++ ++# CHECK: vsle.b $vr29, $vr9, $vr7 ++# CHECK: encoding: [0x3d,0x1d,0x02,0x70] ++vsle.b $vr29, $vr9, $vr7 ++ ++# CHECK: vsle.h $vr5, $vr24, $vr9 ++# CHECK: encoding: [0x05,0xa7,0x02,0x70] ++vsle.h $vr5, $vr24, $vr9 ++ ++# CHECK: vsle.w $vr17, $vr30, $vr20 ++# CHECK: encoding: [0xd1,0x53,0x03,0x70] ++vsle.w $vr17, $vr30, $vr20 ++ ++# CHECK: vsle.d $vr27, $vr6, $vr13 ++# CHECK: encoding: [0xdb,0xb4,0x03,0x70] ++vsle.d $vr27, $vr6, $vr13 ++ ++# CHECK: vsle.bu $vr30, $vr11, $vr10 ++# CHECK: encoding: [0x7e,0x29,0x04,0x70] ++vsle.bu $vr30, $vr11, $vr10 ++ ++# CHECK: vsle.hu $vr19, $vr29, $vr31 ++# CHECK: encoding: [0xb3,0xff,0x04,0x70] ++vsle.hu $vr19, $vr29, $vr31 ++ ++# CHECK: vsle.wu $vr16, $vr18, $vr20 ++# CHECK: encoding: [0x50,0x52,0x05,0x70] ++vsle.wu $vr16, $vr18, $vr20 ++ ++# CHECK: vsle.du $vr31, $vr17, $vr8 ++# CHECK: encoding: [0x3f,0xa2,0x05,0x70] ++vsle.du $vr31, $vr17, $vr8 ++ ++# CHECK: vslt.b $vr26, $vr7, $vr5 ++# CHECK: encoding: [0xfa,0x14,0x06,0x70] ++vslt.b $vr26, $vr7, $vr5 ++ ++# CHECK: vslt.h $vr14, $vr2, $vr20 ++# CHECK: encoding: [0x4e,0xd0,0x06,0x70] ++vslt.h $vr14, $vr2, $vr20 ++ ++# CHECK: vslt.w $vr14, $vr5, $vr25 ++# CHECK: encoding: [0xae,0x64,0x07,0x70] ++vslt.w $vr14, $vr5, $vr25 ++ ++# CHECK: vslt.d $vr26, $vr9, $vr25 ++# CHECK: encoding: [0x3a,0xe5,0x07,0x70] ++vslt.d $vr26, $vr9, $vr25 ++ ++# CHECK: vslt.bu $vr31, $vr18, $vr14 ++# CHECK: encoding: [0x5f,0x3a,0x08,0x70] ++vslt.bu $vr31, $vr18, $vr14 ++ ++# CHECK: vslt.hu $vr5, $vr15, $vr5 ++# CHECK: encoding: [0xe5,0x95,0x08,0x70] ++vslt.hu $vr5, $vr15, $vr5 ++ ++# CHECK: vslt.wu $vr31, $vr28, $vr13 ++# CHECK: encoding: [0x9f,0x37,0x09,0x70] ++vslt.wu $vr31, $vr28, $vr13 ++ ++# CHECK: vslt.du $vr11, $vr19, $vr22 ++# CHECK: encoding: [0x6b,0xda,0x09,0x70] ++vslt.du $vr11, $vr19, $vr22 ++ ++# CHECK: vadd.b $vr26, $vr20, $vr31 ++# CHECK: encoding: [0x9a,0x7e,0x0a,0x70] ++vadd.b $vr26, $vr20, $vr31 ++ ++# CHECK: vadd.h $vr11, $vr25, $vr29 ++# CHECK: encoding: [0x2b,0xf7,0x0a,0x70] ++vadd.h $vr11, $vr25, $vr29 ++ ++# CHECK: vadd.w $vr7, $vr25, $vr13 ++# CHECK: encoding: [0x27,0x37,0x0b,0x70] ++vadd.w $vr7, $vr25, $vr13 ++ ++# CHECK: vadd.d $vr16, $vr13, $vr16 ++# CHECK: encoding: [0xb0,0xc1,0x0b,0x70] ++vadd.d $vr16, $vr13, $vr16 ++ ++# CHECK: vsub.b $vr12, $vr3, $vr21 ++# CHECK: encoding: [0x6c,0x54,0x0c,0x70] ++vsub.b $vr12, $vr3, $vr21 ++ ++# CHECK: vsub.h $vr15, $vr13, $vr25 ++# CHECK: encoding: [0xaf,0xe5,0x0c,0x70] ++vsub.h $vr15, $vr13, $vr25 ++ ++# CHECK: vsub.w $vr20, $vr16, $vr25 ++# CHECK: encoding: [0x14,0x66,0x0d,0x70] ++vsub.w $vr20, $vr16, $vr25 ++ ++# CHECK: vsub.d $vr19, $vr3, $vr7 ++# CHECK: encoding: [0x73,0x9c,0x0d,0x70] ++vsub.d $vr19, $vr3, $vr7 ++ ++# CHECK: vsadd.b $vr14, $vr30, $vr5 ++# CHECK: encoding: [0xce,0x17,0x46,0x70] ++vsadd.b $vr14, $vr30, $vr5 ++ ++# CHECK: vsadd.h $vr10, $vr1, $vr15 ++# CHECK: encoding: [0x2a,0xbc,0x46,0x70] ++vsadd.h $vr10, $vr1, $vr15 ++ ++# CHECK: vsadd.w $vr19, $vr31, $vr10 ++# CHECK: encoding: [0xf3,0x2b,0x47,0x70] ++vsadd.w $vr19, $vr31, $vr10 ++ ++# CHECK: vsadd.d $vr26, $vr19, $vr28 ++# CHECK: encoding: [0x7a,0xf2,0x47,0x70] ++vsadd.d $vr26, $vr19, $vr28 ++ ++# CHECK: vssub.b $vr24, $vr3, $vr7 ++# CHECK: encoding: [0x78,0x1c,0x48,0x70] ++vssub.b $vr24, $vr3, $vr7 ++ ++# CHECK: vssub.h $vr31, $vr4, $vr24 ++# CHECK: encoding: [0x9f,0xe0,0x48,0x70] ++vssub.h $vr31, $vr4, $vr24 ++ ++# CHECK: vssub.w $vr29, $vr27, $vr12 ++# CHECK: encoding: [0x7d,0x33,0x49,0x70] ++vssub.w $vr29, $vr27, $vr12 ++ ++# CHECK: vssub.d $vr23, $vr16, $vr9 ++# CHECK: encoding: [0x17,0xa6,0x49,0x70] ++vssub.d $vr23, $vr16, $vr9 ++ ++# CHECK: vsadd.bu $vr26, $vr29, $vr4 ++# CHECK: encoding: [0xba,0x13,0x4a,0x70] ++vsadd.bu $vr26, $vr29, $vr4 ++ ++# CHECK: vsadd.hu $vr15, $vr7, $vr9 ++# CHECK: encoding: [0xef,0xa4,0x4a,0x70] ++vsadd.hu $vr15, $vr7, $vr9 ++ ++# CHECK: vsadd.wu $vr13, $vr18, $vr16 ++# CHECK: encoding: [0x4d,0x42,0x4b,0x70] ++vsadd.wu $vr13, $vr18, $vr16 ++ ++# CHECK: vsadd.du $vr4, $vr5, $vr0 ++# CHECK: encoding: [0xa4,0x80,0x4b,0x70] ++vsadd.du $vr4, $vr5, $vr0 ++ ++# CHECK: vssub.bu $vr27, $vr17, $vr13 ++# CHECK: encoding: [0x3b,0x36,0x4c,0x70] ++vssub.bu $vr27, $vr17, $vr13 ++ ++# CHECK: vssub.hu $vr5, $vr8, $vr1 ++# CHECK: encoding: [0x05,0x85,0x4c,0x70] ++vssub.hu $vr5, $vr8, $vr1 ++ ++# CHECK: vssub.wu $vr14, $vr8, $vr22 ++# CHECK: encoding: [0x0e,0x59,0x4d,0x70] ++vssub.wu $vr14, $vr8, $vr22 ++ ++# CHECK: vssub.du $vr17, $vr9, $vr8 ++# CHECK: encoding: [0x31,0xa1,0x4d,0x70] ++vssub.du $vr17, $vr9, $vr8 ++ ++# CHECK: vhaddw.h.b $vr23, $vr19, $vr2 ++# CHECK: encoding: [0x77,0x0a,0x54,0x70] ++vhaddw.h.b $vr23, $vr19, $vr2 ++ ++# CHECK: vhaddw.w.h $vr26, $vr16, $vr26 ++# CHECK: encoding: [0x1a,0xea,0x54,0x70] ++vhaddw.w.h $vr26, $vr16, $vr26 ++ ++# CHECK: vhaddw.d.w $vr0, $vr31, $vr27 ++# CHECK: encoding: [0xe0,0x6f,0x55,0x70] ++vhaddw.d.w $vr0, $vr31, $vr27 ++ ++# CHECK: vhaddw.q.d $vr25, $vr13, $vr25 ++# CHECK: encoding: [0xb9,0xe5,0x55,0x70] ++vhaddw.q.d $vr25, $vr13, $vr25 ++ ++# CHECK: vhsubw.h.b $vr9, $vr23, $vr5 ++# CHECK: encoding: [0xe9,0x16,0x56,0x70] ++vhsubw.h.b $vr9, $vr23, $vr5 ++ ++# CHECK: vhsubw.w.h $vr15, $vr29, $vr26 ++# CHECK: encoding: [0xaf,0xeb,0x56,0x70] ++vhsubw.w.h $vr15, $vr29, $vr26 ++ ++# CHECK: vhsubw.d.w $vr0, $vr28, $vr18 ++# CHECK: encoding: [0x80,0x4b,0x57,0x70] ++vhsubw.d.w $vr0, $vr28, $vr18 ++ ++# CHECK: vhsubw.q.d $vr14, $vr25, $vr8 ++# CHECK: encoding: [0x2e,0xa3,0x57,0x70] ++vhsubw.q.d $vr14, $vr25, $vr8 ++ ++# CHECK: vhaddw.hu.bu $vr1, $vr16, $vr21 ++# CHECK: encoding: [0x01,0x56,0x58,0x70] ++vhaddw.hu.bu $vr1, $vr16, $vr21 ++ ++# CHECK: vhaddw.wu.hu $vr28, $vr21, $vr29 ++# CHECK: encoding: [0xbc,0xf6,0x58,0x70] ++vhaddw.wu.hu $vr28, $vr21, $vr29 ++ ++# CHECK: vhaddw.du.wu $vr29, $vr20, $vr16 ++# CHECK: encoding: [0x9d,0x42,0x59,0x70] ++vhaddw.du.wu $vr29, $vr20, $vr16 ++ ++# CHECK: vhaddw.qu.du $vr2, $vr10, $vr28 ++# CHECK: encoding: [0x42,0xf1,0x59,0x70] ++vhaddw.qu.du $vr2, $vr10, $vr28 ++ ++# CHECK: vhsubw.hu.bu $vr31, $vr3, $vr30 ++# CHECK: encoding: [0x7f,0x78,0x5a,0x70] ++vhsubw.hu.bu $vr31, $vr3, $vr30 ++ ++# CHECK: vhsubw.wu.hu $vr5, $vr9, $vr11 ++# CHECK: encoding: [0x25,0xad,0x5a,0x70] ++vhsubw.wu.hu $vr5, $vr9, $vr11 ++ ++# CHECK: vhsubw.du.wu $vr23, $vr31, $vr22 ++# CHECK: encoding: [0xf7,0x5b,0x5b,0x70] ++vhsubw.du.wu $vr23, $vr31, $vr22 ++ ++# CHECK: vhsubw.qu.du $vr4, $vr28, $vr18 ++# CHECK: encoding: [0x84,0xcb,0x5b,0x70] ++vhsubw.qu.du $vr4, $vr28, $vr18 ++ ++# CHECK: vadda.b $vr18, $vr13, $vr11 ++# CHECK: encoding: [0xb2,0x2d,0x5c,0x70] ++vadda.b $vr18, $vr13, $vr11 ++ ++# CHECK: vadda.h $vr17, $vr14, $vr12 ++# CHECK: encoding: [0xd1,0xb1,0x5c,0x70] ++vadda.h $vr17, $vr14, $vr12 ++ ++# CHECK: vadda.w $vr22, $vr11, $vr3 ++# CHECK: encoding: [0x76,0x0d,0x5d,0x70] ++vadda.w $vr22, $vr11, $vr3 ++ ++# CHECK: vadda.d $vr24, $vr24, $vr15 ++# CHECK: encoding: [0x18,0xbf,0x5d,0x70] ++vadda.d $vr24, $vr24, $vr15 ++ ++# CHECK: vabsd.b $vr23, $vr19, $vr17 ++# CHECK: encoding: [0x77,0x46,0x60,0x70] ++vabsd.b $vr23, $vr19, $vr17 ++ ++# CHECK: vabsd.h $vr14, $vr31, $vr13 ++# CHECK: encoding: [0xee,0xb7,0x60,0x70] ++vabsd.h $vr14, $vr31, $vr13 ++ ++# CHECK: vabsd.w $vr24, $vr1, $vr9 ++# CHECK: encoding: [0x38,0x24,0x61,0x70] ++vabsd.w $vr24, $vr1, $vr9 ++ ++# CHECK: vabsd.d $vr31, $vr20, $vr0 ++# CHECK: encoding: [0x9f,0x82,0x61,0x70] ++vabsd.d $vr31, $vr20, $vr0 ++ ++# CHECK: vabsd.bu $vr23, $vr12, $vr29 ++# CHECK: encoding: [0x97,0x75,0x62,0x70] ++vabsd.bu $vr23, $vr12, $vr29 ++ ++# CHECK: vabsd.hu $vr18, $vr19, $vr1 ++# CHECK: encoding: [0x72,0x86,0x62,0x70] ++vabsd.hu $vr18, $vr19, $vr1 ++ ++# CHECK: vabsd.wu $vr13, $vr21, $vr28 ++# CHECK: encoding: [0xad,0x72,0x63,0x70] ++vabsd.wu $vr13, $vr21, $vr28 ++ ++# CHECK: vabsd.du $vr16, $vr26, $vr11 ++# CHECK: encoding: [0x50,0xaf,0x63,0x70] ++vabsd.du $vr16, $vr26, $vr11 ++ ++# CHECK: vavg.b $vr1, $vr21, $vr27 ++# CHECK: encoding: [0xa1,0x6e,0x64,0x70] ++vavg.b $vr1, $vr21, $vr27 ++ ++# CHECK: vavg.h $vr20, $vr26, $vr15 ++# CHECK: encoding: [0x54,0xbf,0x64,0x70] ++vavg.h $vr20, $vr26, $vr15 ++ ++# CHECK: vavg.w $vr29, $vr18, $vr3 ++# CHECK: encoding: [0x5d,0x0e,0x65,0x70] ++vavg.w $vr29, $vr18, $vr3 ++ ++# CHECK: vavg.d $vr19, $vr15, $vr31 ++# CHECK: encoding: [0xf3,0xfd,0x65,0x70] ++vavg.d $vr19, $vr15, $vr31 ++ ++# CHECK: vavg.bu $vr11, $vr11, $vr17 ++# CHECK: encoding: [0x6b,0x45,0x66,0x70] ++vavg.bu $vr11, $vr11, $vr17 ++ ++# CHECK: vavg.hu $vr30, $vr28, $vr13 ++# CHECK: encoding: [0x9e,0xb7,0x66,0x70] ++vavg.hu $vr30, $vr28, $vr13 ++ ++# CHECK: vavg.wu $vr7, $vr7, $vr10 ++# CHECK: encoding: [0xe7,0x28,0x67,0x70] ++vavg.wu $vr7, $vr7, $vr10 ++ ++# CHECK: vavg.du $vr25, $vr7, $vr12 ++# CHECK: encoding: [0xf9,0xb0,0x67,0x70] ++vavg.du $vr25, $vr7, $vr12 ++ ++# CHECK: vavgr.b $vr29, $vr13, $vr7 ++# CHECK: encoding: [0xbd,0x1d,0x68,0x70] ++vavgr.b $vr29, $vr13, $vr7 ++ ++# CHECK: vavgr.h $vr5, $vr28, $vr19 ++# CHECK: encoding: [0x85,0xcf,0x68,0x70] ++vavgr.h $vr5, $vr28, $vr19 ++ ++# CHECK: vavgr.w $vr19, $vr15, $vr14 ++# CHECK: encoding: [0xf3,0x39,0x69,0x70] ++vavgr.w $vr19, $vr15, $vr14 ++ ++# CHECK: vavgr.d $vr3, $vr0, $vr2 ++# CHECK: encoding: [0x03,0x88,0x69,0x70] ++vavgr.d $vr3, $vr0, $vr2 ++ ++# CHECK: vavgr.bu $vr23, $vr11, $vr31 ++# CHECK: encoding: [0x77,0x7d,0x6a,0x70] ++vavgr.bu $vr23, $vr11, $vr31 ++ ++# CHECK: vavgr.hu $vr25, $vr19, $vr8 ++# CHECK: encoding: [0x79,0xa2,0x6a,0x70] ++vavgr.hu $vr25, $vr19, $vr8 ++ ++# CHECK: vavgr.wu $vr30, $vr25, $vr12 ++# CHECK: encoding: [0x3e,0x33,0x6b,0x70] ++vavgr.wu $vr30, $vr25, $vr12 ++ ++# CHECK: vavgr.du $vr25, $vr20, $vr25 ++# CHECK: encoding: [0x99,0xe6,0x6b,0x70] ++vavgr.du $vr25, $vr20, $vr25 ++ ++# CHECK: vmax.b $vr28, $vr26, $vr26 ++# CHECK: encoding: [0x5c,0x6b,0x70,0x70] ++vmax.b $vr28, $vr26, $vr26 ++ ++# CHECK: vmax.h $vr8, $vr13, $vr11 ++# CHECK: encoding: [0xa8,0xad,0x70,0x70] ++vmax.h $vr8, $vr13, $vr11 ++ ++# CHECK: vmax.w $vr21, $vr28, $vr31 ++# CHECK: encoding: [0x95,0x7f,0x71,0x70] ++vmax.w $vr21, $vr28, $vr31 ++ ++# CHECK: vmax.d $vr1, $vr30, $vr26 ++# CHECK: encoding: [0xc1,0xeb,0x71,0x70] ++vmax.d $vr1, $vr30, $vr26 ++ ++# CHECK: vmin.b $vr10, $vr14, $vr9 ++# CHECK: encoding: [0xca,0x25,0x72,0x70] ++vmin.b $vr10, $vr14, $vr9 ++ ++# CHECK: vmin.h $vr10, $vr11, $vr21 ++# CHECK: encoding: [0x6a,0xd5,0x72,0x70] ++vmin.h $vr10, $vr11, $vr21 ++ ++# CHECK: vmin.w $vr26, $vr0, $vr12 ++# CHECK: encoding: [0x1a,0x30,0x73,0x70] ++vmin.w $vr26, $vr0, $vr12 ++ ++# CHECK: vmin.d $vr19, $vr18, $vr0 ++# CHECK: encoding: [0x53,0x82,0x73,0x70] ++vmin.d $vr19, $vr18, $vr0 ++ ++# CHECK: vmax.bu $vr2, $vr25, $vr28 ++# CHECK: encoding: [0x22,0x73,0x74,0x70] ++vmax.bu $vr2, $vr25, $vr28 ++ ++# CHECK: vmax.hu $vr9, $vr22, $vr30 ++# CHECK: encoding: [0xc9,0xfa,0x74,0x70] ++vmax.hu $vr9, $vr22, $vr30 ++ ++# CHECK: vmax.wu $vr21, $vr25, $vr27 ++# CHECK: encoding: [0x35,0x6f,0x75,0x70] ++vmax.wu $vr21, $vr25, $vr27 ++ ++# CHECK: vmax.du $vr3, $vr14, $vr25 ++# CHECK: encoding: [0xc3,0xe5,0x75,0x70] ++vmax.du $vr3, $vr14, $vr25 ++ ++# CHECK: vmin.bu $vr24, $vr7, $vr27 ++# CHECK: encoding: [0xf8,0x6c,0x76,0x70] ++vmin.bu $vr24, $vr7, $vr27 ++ ++# CHECK: vmin.hu $vr18, $vr28, $vr29 ++# CHECK: encoding: [0x92,0xf7,0x76,0x70] ++vmin.hu $vr18, $vr28, $vr29 ++ ++# CHECK: vmin.wu $vr26, $vr4, $vr2 ++# CHECK: encoding: [0x9a,0x08,0x77,0x70] ++vmin.wu $vr26, $vr4, $vr2 ++ ++# CHECK: vmin.du $vr13, $vr0, $vr4 ++# CHECK: encoding: [0x0d,0x90,0x77,0x70] ++vmin.du $vr13, $vr0, $vr4 ++ ++# CHECK: vmul.b $vr1, $vr21, $vr23 ++# CHECK: encoding: [0xa1,0x5e,0x84,0x70] ++vmul.b $vr1, $vr21, $vr23 ++ ++# CHECK: vmul.h $vr9, $vr21, $vr25 ++# CHECK: encoding: [0xa9,0xe6,0x84,0x70] ++vmul.h $vr9, $vr21, $vr25 ++ ++# CHECK: vmul.w $vr16, $vr8, $vr28 ++# CHECK: encoding: [0x10,0x71,0x85,0x70] ++vmul.w $vr16, $vr8, $vr28 ++ ++# CHECK: vmul.d $vr4, $vr17, $vr11 ++# CHECK: encoding: [0x24,0xae,0x85,0x70] ++vmul.d $vr4, $vr17, $vr11 ++ ++# CHECK: vmuh.b $vr12, $vr24, $vr8 ++# CHECK: encoding: [0x0c,0x23,0x86,0x70] ++vmuh.b $vr12, $vr24, $vr8 ++ ++# CHECK: vmuh.h $vr6, $vr21, $vr24 ++# CHECK: encoding: [0xa6,0xe2,0x86,0x70] ++vmuh.h $vr6, $vr21, $vr24 ++ ++# CHECK: vmuh.w $vr11, $vr29, $vr30 ++# CHECK: encoding: [0xab,0x7b,0x87,0x70] ++vmuh.w $vr11, $vr29, $vr30 ++ ++# CHECK: vmuh.d $vr1, $vr17, $vr25 ++# CHECK: encoding: [0x21,0xe6,0x87,0x70] ++vmuh.d $vr1, $vr17, $vr25 ++ ++# CHECK: vmuh.bu $vr29, $vr29, $vr10 ++# CHECK: encoding: [0xbd,0x2b,0x88,0x70] ++vmuh.bu $vr29, $vr29, $vr10 ++ ++# CHECK: vmuh.hu $vr24, $vr9, $vr21 ++# CHECK: encoding: [0x38,0xd5,0x88,0x70] ++vmuh.hu $vr24, $vr9, $vr21 ++ ++# CHECK: vmuh.wu $vr15, $vr20, $vr19 ++# CHECK: encoding: [0x8f,0x4e,0x89,0x70] ++vmuh.wu $vr15, $vr20, $vr19 ++ ++# CHECK: vmuh.du $vr0, $vr28, $vr1 ++# CHECK: encoding: [0x80,0x87,0x89,0x70] ++vmuh.du $vr0, $vr28, $vr1 ++ ++# CHECK: vmadd.b $vr27, $vr0, $vr4 ++# CHECK: encoding: [0x1b,0x10,0xa8,0x70] ++vmadd.b $vr27, $vr0, $vr4 ++ ++# CHECK: vmadd.h $vr19, $vr20, $vr28 ++# CHECK: encoding: [0x93,0xf2,0xa8,0x70] ++vmadd.h $vr19, $vr20, $vr28 ++ ++# CHECK: vmadd.w $vr15, $vr7, $vr3 ++# CHECK: encoding: [0xef,0x0c,0xa9,0x70] ++vmadd.w $vr15, $vr7, $vr3 ++ ++# CHECK: vmadd.d $vr25, $vr25, $vr30 ++# CHECK: encoding: [0x39,0xfb,0xa9,0x70] ++vmadd.d $vr25, $vr25, $vr30 ++ ++# CHECK: vmsub.b $vr24, $vr25, $vr26 ++# CHECK: encoding: [0x38,0x6b,0xaa,0x70] ++vmsub.b $vr24, $vr25, $vr26 ++ ++# CHECK: vmsub.h $vr12, $vr0, $vr13 ++# CHECK: encoding: [0x0c,0xb4,0xaa,0x70] ++vmsub.h $vr12, $vr0, $vr13 ++ ++# CHECK: vmsub.w $vr26, $vr16, $vr24 ++# CHECK: encoding: [0x1a,0x62,0xab,0x70] ++vmsub.w $vr26, $vr16, $vr24 ++ ++# CHECK: vmsub.d $vr13, $vr10, $vr8 ++# CHECK: encoding: [0x4d,0xa1,0xab,0x70] ++vmsub.d $vr13, $vr10, $vr8 ++ ++# CHECK: vdiv.b $vr18, $vr28, $vr21 ++# CHECK: encoding: [0x92,0x57,0xe0,0x70] ++vdiv.b $vr18, $vr28, $vr21 ++ ++# CHECK: vdiv.h $vr17, $vr24, $vr1 ++# CHECK: encoding: [0x11,0x87,0xe0,0x70] ++vdiv.h $vr17, $vr24, $vr1 ++ ++# CHECK: vdiv.w $vr3, $vr10, $vr22 ++# CHECK: encoding: [0x43,0x59,0xe1,0x70] ++vdiv.w $vr3, $vr10, $vr22 ++ ++# CHECK: vdiv.d $vr15, $vr13, $vr8 ++# CHECK: encoding: [0xaf,0xa1,0xe1,0x70] ++vdiv.d $vr15, $vr13, $vr8 ++ ++# CHECK: vmod.b $vr19, $vr25, $vr20 ++# CHECK: encoding: [0x33,0x53,0xe2,0x70] ++vmod.b $vr19, $vr25, $vr20 ++ ++# CHECK: vmod.h $vr2, $vr24, $vr22 ++# CHECK: encoding: [0x02,0xdb,0xe2,0x70] ++vmod.h $vr2, $vr24, $vr22 ++ ++# CHECK: vmod.w $vr31, $vr18, $vr0 ++# CHECK: encoding: [0x5f,0x02,0xe3,0x70] ++vmod.w $vr31, $vr18, $vr0 ++ ++# CHECK: vmod.d $vr31, $vr0, $vr2 ++# CHECK: encoding: [0x1f,0x88,0xe3,0x70] ++vmod.d $vr31, $vr0, $vr2 ++ ++# CHECK: vdiv.bu $vr15, $vr4, $vr3 ++# CHECK: encoding: [0x8f,0x0c,0xe4,0x70] ++vdiv.bu $vr15, $vr4, $vr3 ++ ++# CHECK: vdiv.hu $vr17, $vr7, $vr29 ++# CHECK: encoding: [0xf1,0xf4,0xe4,0x70] ++vdiv.hu $vr17, $vr7, $vr29 ++ ++# CHECK: vdiv.wu $vr27, $vr10, $vr3 ++# CHECK: encoding: [0x5b,0x0d,0xe5,0x70] ++vdiv.wu $vr27, $vr10, $vr3 ++ ++# CHECK: vdiv.du $vr8, $vr24, $vr26 ++# CHECK: encoding: [0x08,0xeb,0xe5,0x70] ++vdiv.du $vr8, $vr24, $vr26 ++ ++# CHECK: vmod.bu $vr10, $vr22, $vr24 ++# CHECK: encoding: [0xca,0x62,0xe6,0x70] ++vmod.bu $vr10, $vr22, $vr24 ++ ++# CHECK: vmod.hu $vr19, $vr31, $vr24 ++# CHECK: encoding: [0xf3,0xe3,0xe6,0x70] ++vmod.hu $vr19, $vr31, $vr24 ++ ++# CHECK: vmod.wu $vr26, $vr24, $vr13 ++# CHECK: encoding: [0x1a,0x37,0xe7,0x70] ++vmod.wu $vr26, $vr24, $vr13 ++ ++# CHECK: vmod.du $vr20, $vr19, $vr10 ++# CHECK: encoding: [0x74,0xaa,0xe7,0x70] ++vmod.du $vr20, $vr19, $vr10 ++ ++# CHECK: vsll.b $vr28, $vr18, $vr30 ++# CHECK: encoding: [0x5c,0x7a,0xe8,0x70] ++vsll.b $vr28, $vr18, $vr30 ++ ++# CHECK: vsll.h $vr22, $vr4, $vr30 ++# CHECK: encoding: [0x96,0xf8,0xe8,0x70] ++vsll.h $vr22, $vr4, $vr30 ++ ++# CHECK: vsll.w $vr1, $vr25, $vr8 ++# CHECK: encoding: [0x21,0x23,0xe9,0x70] ++vsll.w $vr1, $vr25, $vr8 ++ ++# CHECK: vsll.d $vr31, $vr18, $vr15 ++# CHECK: encoding: [0x5f,0xbe,0xe9,0x70] ++vsll.d $vr31, $vr18, $vr15 ++ ++# CHECK: vsrl.b $vr5, $vr12, $vr16 ++# CHECK: encoding: [0x85,0x41,0xea,0x70] ++vsrl.b $vr5, $vr12, $vr16 ++ ++# CHECK: vsrl.h $vr9, $vr5, $vr28 ++# CHECK: encoding: [0xa9,0xf0,0xea,0x70] ++vsrl.h $vr9, $vr5, $vr28 ++ ++# CHECK: vsrl.w $vr30, $vr16, $vr1 ++# CHECK: encoding: [0x1e,0x06,0xeb,0x70] ++vsrl.w $vr30, $vr16, $vr1 ++ ++# CHECK: vsrl.d $vr28, $vr23, $vr27 ++# CHECK: encoding: [0xfc,0xee,0xeb,0x70] ++vsrl.d $vr28, $vr23, $vr27 ++ ++# CHECK: vsra.b $vr15, $vr17, $vr25 ++# CHECK: encoding: [0x2f,0x66,0xec,0x70] ++vsra.b $vr15, $vr17, $vr25 ++ ++# CHECK: vsra.h $vr0, $vr8, $vr5 ++# CHECK: encoding: [0x00,0x95,0xec,0x70] ++vsra.h $vr0, $vr8, $vr5 ++ ++# CHECK: vsra.w $vr29, $vr9, $vr7 ++# CHECK: encoding: [0x3d,0x1d,0xed,0x70] ++vsra.w $vr29, $vr9, $vr7 ++ ++# CHECK: vsra.d $vr22, $vr3, $vr19 ++# CHECK: encoding: [0x76,0xcc,0xed,0x70] ++vsra.d $vr22, $vr3, $vr19 ++ ++# CHECK: vrotr.b $vr8, $vr16, $vr8 ++# CHECK: encoding: [0x08,0x22,0xee,0x70] ++vrotr.b $vr8, $vr16, $vr8 ++ ++# CHECK: vrotr.h $vr14, $vr5, $vr11 ++# CHECK: encoding: [0xae,0xac,0xee,0x70] ++vrotr.h $vr14, $vr5, $vr11 ++ ++# CHECK: vrotr.w $vr17, $vr28, $vr25 ++# CHECK: encoding: [0x91,0x67,0xef,0x70] ++vrotr.w $vr17, $vr28, $vr25 ++ ++# CHECK: vrotr.d $vr18, $vr28, $vr19 ++# CHECK: encoding: [0x92,0xcf,0xef,0x70] ++vrotr.d $vr18, $vr28, $vr19 ++ ++# CHECK: vsrlr.b $vr1, $vr27, $vr17 ++# CHECK: encoding: [0x61,0x47,0xf0,0x70] ++vsrlr.b $vr1, $vr27, $vr17 ++ ++# CHECK: vsrlr.h $vr26, $vr14, $vr10 ++# CHECK: encoding: [0xda,0xa9,0xf0,0x70] ++vsrlr.h $vr26, $vr14, $vr10 ++ ++# CHECK: vsrlr.w $vr3, $vr29, $vr24 ++# CHECK: encoding: [0xa3,0x63,0xf1,0x70] ++vsrlr.w $vr3, $vr29, $vr24 ++ ++# CHECK: vsrlr.d $vr23, $vr4, $vr10 ++# CHECK: encoding: [0x97,0xa8,0xf1,0x70] ++vsrlr.d $vr23, $vr4, $vr10 ++ ++# CHECK: vsrar.b $vr25, $vr2, $vr21 ++# CHECK: encoding: [0x59,0x54,0xf2,0x70] ++vsrar.b $vr25, $vr2, $vr21 ++ ++# CHECK: vsrar.h $vr4, $vr11, $vr20 ++# CHECK: encoding: [0x64,0xd1,0xf2,0x70] ++vsrar.h $vr4, $vr11, $vr20 ++ ++# CHECK: vsrar.w $vr11, $vr21, $vr29 ++# CHECK: encoding: [0xab,0x76,0xf3,0x70] ++vsrar.w $vr11, $vr21, $vr29 ++ ++# CHECK: vsrar.d $vr29, $vr5, $vr2 ++# CHECK: encoding: [0xbd,0x88,0xf3,0x70] ++vsrar.d $vr29, $vr5, $vr2 ++ ++# CHECK: vsrln.b.h $vr24, $vr14, $vr29 ++# CHECK: encoding: [0xd8,0xf5,0xf4,0x70] ++vsrln.b.h $vr24, $vr14, $vr29 ++ ++# CHECK: vsrln.h.w $vr26, $vr22, $vr16 ++# CHECK: encoding: [0xda,0x42,0xf5,0x70] ++vsrln.h.w $vr26, $vr22, $vr16 ++ ++# CHECK: vsrln.w.d $vr17, $vr31, $vr2 ++# CHECK: encoding: [0xf1,0x8b,0xf5,0x70] ++vsrln.w.d $vr17, $vr31, $vr2 ++ ++# CHECK: vsran.b.h $vr31, $vr0, $vr23 ++# CHECK: encoding: [0x1f,0xdc,0xf6,0x70] ++vsran.b.h $vr31, $vr0, $vr23 ++ ++# CHECK: vsran.h.w $vr20, $vr12, $vr29 ++# CHECK: encoding: [0x94,0x75,0xf7,0x70] ++vsran.h.w $vr20, $vr12, $vr29 ++ ++# CHECK: vsran.w.d $vr2, $vr1, $vr2 ++# CHECK: encoding: [0x22,0x88,0xf7,0x70] ++vsran.w.d $vr2, $vr1, $vr2 ++ ++# CHECK: vsrlrn.b.h $vr19, $vr28, $vr0 ++# CHECK: encoding: [0x93,0x83,0xf8,0x70] ++vsrlrn.b.h $vr19, $vr28, $vr0 ++ ++# CHECK: vsrlrn.h.w $vr23, $vr29, $vr14 ++# CHECK: encoding: [0xb7,0x3b,0xf9,0x70] ++vsrlrn.h.w $vr23, $vr29, $vr14 ++ ++# CHECK: vsrlrn.w.d $vr5, $vr26, $vr5 ++# CHECK: encoding: [0x45,0x97,0xf9,0x70] ++vsrlrn.w.d $vr5, $vr26, $vr5 ++ ++# CHECK: vsrarn.b.h $vr17, $vr15, $vr7 ++# CHECK: encoding: [0xf1,0x9d,0xfa,0x70] ++vsrarn.b.h $vr17, $vr15, $vr7 ++ ++# CHECK: vsrarn.h.w $vr12, $vr10, $vr29 ++# CHECK: encoding: [0x4c,0x75,0xfb,0x70] ++vsrarn.h.w $vr12, $vr10, $vr29 ++ ++# CHECK: vsrarn.w.d $vr24, $vr26, $vr27 ++# CHECK: encoding: [0x58,0xef,0xfb,0x70] ++vsrarn.w.d $vr24, $vr26, $vr27 ++ ++# CHECK: vssrln.b.h $vr1, $vr12, $vr14 ++# CHECK: encoding: [0x81,0xb9,0xfc,0x70] ++vssrln.b.h $vr1, $vr12, $vr14 ++ ++# CHECK: vssrln.h.w $vr11, $vr8, $vr18 ++# CHECK: encoding: [0x0b,0x49,0xfd,0x70] ++vssrln.h.w $vr11, $vr8, $vr18 ++ ++# CHECK: vssrln.w.d $vr31, $vr15, $vr6 ++# CHECK: encoding: [0xff,0x99,0xfd,0x70] ++vssrln.w.d $vr31, $vr15, $vr6 ++ ++# CHECK: vssran.b.h $vr13, $vr5, $vr24 ++# CHECK: encoding: [0xad,0xe0,0xfe,0x70] ++vssran.b.h $vr13, $vr5, $vr24 ++ ++# CHECK: vssran.h.w $vr4, $vr26, $vr7 ++# CHECK: encoding: [0x44,0x1f,0xff,0x70] ++vssran.h.w $vr4, $vr26, $vr7 ++ ++# CHECK: vssran.w.d $vr25, $vr10, $vr6 ++# CHECK: encoding: [0x59,0x99,0xff,0x70] ++vssran.w.d $vr25, $vr10, $vr6 ++ ++# CHECK: vssrlrn.b.h $vr28, $vr28, $vr6 ++# CHECK: encoding: [0x9c,0x9b,0x00,0x71] ++vssrlrn.b.h $vr28, $vr28, $vr6 ++ ++# CHECK: vssrlrn.h.w $vr15, $vr23, $vr17 ++# CHECK: encoding: [0xef,0x46,0x01,0x71] ++vssrlrn.h.w $vr15, $vr23, $vr17 ++ ++# CHECK: vssrlrn.w.d $vr12, $vr9, $vr2 ++# CHECK: encoding: [0x2c,0x89,0x01,0x71] ++vssrlrn.w.d $vr12, $vr9, $vr2 ++ ++# CHECK: vssrarn.b.h $vr1, $vr25, $vr17 ++# CHECK: encoding: [0x21,0xc7,0x02,0x71] ++vssrarn.b.h $vr1, $vr25, $vr17 ++ ++# CHECK: vssrarn.h.w $vr3, $vr9, $vr23 ++# CHECK: encoding: [0x23,0x5d,0x03,0x71] ++vssrarn.h.w $vr3, $vr9, $vr23 ++ ++# CHECK: vssrarn.w.d $vr14, $vr9, $vr27 ++# CHECK: encoding: [0x2e,0xed,0x03,0x71] ++vssrarn.w.d $vr14, $vr9, $vr27 ++ ++# CHECK: vssrln.bu.h $vr16, $vr24, $vr15 ++# CHECK: encoding: [0x10,0xbf,0x04,0x71] ++vssrln.bu.h $vr16, $vr24, $vr15 ++ ++# CHECK: vssrln.hu.w $vr21, $vr23, $vr30 ++# CHECK: encoding: [0xf5,0x7a,0x05,0x71] ++vssrln.hu.w $vr21, $vr23, $vr30 ++ ++# CHECK: vssrln.wu.d $vr12, $vr8, $vr30 ++# CHECK: encoding: [0x0c,0xf9,0x05,0x71] ++vssrln.wu.d $vr12, $vr8, $vr30 ++ ++# CHECK: vssran.bu.h $vr5, $vr18, $vr12 ++# CHECK: encoding: [0x45,0xb2,0x06,0x71] ++vssran.bu.h $vr5, $vr18, $vr12 ++ ++# CHECK: vssran.hu.w $vr0, $vr7, $vr28 ++# CHECK: encoding: [0xe0,0x70,0x07,0x71] ++vssran.hu.w $vr0, $vr7, $vr28 ++ ++# CHECK: vssran.wu.d $vr5, $vr11, $vr8 ++# CHECK: encoding: [0x65,0xa1,0x07,0x71] ++vssran.wu.d $vr5, $vr11, $vr8 ++ ++# CHECK: vssrlrn.bu.h $vr18, $vr25, $vr3 ++# CHECK: encoding: [0x32,0x8f,0x08,0x71] ++vssrlrn.bu.h $vr18, $vr25, $vr3 ++ ++# CHECK: vssrlrn.hu.w $vr19, $vr1, $vr20 ++# CHECK: encoding: [0x33,0x50,0x09,0x71] ++vssrlrn.hu.w $vr19, $vr1, $vr20 ++ ++# CHECK: vssrlrn.wu.d $vr6, $vr30, $vr18 ++# CHECK: encoding: [0xc6,0xcb,0x09,0x71] ++vssrlrn.wu.d $vr6, $vr30, $vr18 ++ ++# CHECK: vssrarn.bu.h $vr12, $vr13, $vr3 ++# CHECK: encoding: [0xac,0x8d,0x0a,0x71] ++vssrarn.bu.h $vr12, $vr13, $vr3 ++ ++# CHECK: vssrarn.hu.w $vr18, $vr5, $vr20 ++# CHECK: encoding: [0xb2,0x50,0x0b,0x71] ++vssrarn.hu.w $vr18, $vr5, $vr20 ++ ++# CHECK: vssrarn.wu.d $vr23, $vr8, $vr21 ++# CHECK: encoding: [0x17,0xd5,0x0b,0x71] ++vssrarn.wu.d $vr23, $vr8, $vr21 ++ ++# CHECK: vbitclr.b $vr14, $vr2, $vr31 ++# CHECK: encoding: [0x4e,0x7c,0x0c,0x71] ++vbitclr.b $vr14, $vr2, $vr31 ++ ++# CHECK: vbitclr.h $vr17, $vr25, $vr8 ++# CHECK: encoding: [0x31,0xa3,0x0c,0x71] ++vbitclr.h $vr17, $vr25, $vr8 ++ ++# CHECK: vbitclr.w $vr18, $vr11, $vr3 ++# CHECK: encoding: [0x72,0x0d,0x0d,0x71] ++vbitclr.w $vr18, $vr11, $vr3 ++ ++# CHECK: vbitclr.d $vr31, $vr15, $vr29 ++# CHECK: encoding: [0xff,0xf5,0x0d,0x71] ++vbitclr.d $vr31, $vr15, $vr29 ++ ++# CHECK: vbitset.b $vr8, $vr29, $vr16 ++# CHECK: encoding: [0xa8,0x43,0x0e,0x71] ++vbitset.b $vr8, $vr29, $vr16 ++ ++# CHECK: vbitset.h $vr5, $vr17, $vr17 ++# CHECK: encoding: [0x25,0xc6,0x0e,0x71] ++vbitset.h $vr5, $vr17, $vr17 ++ ++# CHECK: vbitset.w $vr5, $vr19, $vr5 ++# CHECK: encoding: [0x65,0x16,0x0f,0x71] ++vbitset.w $vr5, $vr19, $vr5 ++ ++# CHECK: vbitset.d $vr5, $vr27, $vr10 ++# CHECK: encoding: [0x65,0xab,0x0f,0x71] ++vbitset.d $vr5, $vr27, $vr10 ++ ++# CHECK: vbitrev.b $vr16, $vr5, $vr8 ++# CHECK: encoding: [0xb0,0x20,0x10,0x71] ++vbitrev.b $vr16, $vr5, $vr8 ++ ++# CHECK: vbitrev.h $vr12, $vr29, $vr12 ++# CHECK: encoding: [0xac,0xb3,0x10,0x71] ++vbitrev.h $vr12, $vr29, $vr12 ++ ++# CHECK: vbitrev.w $vr3, $vr14, $vr14 ++# CHECK: encoding: [0xc3,0x39,0x11,0x71] ++vbitrev.w $vr3, $vr14, $vr14 ++ ++# CHECK: vbitrev.d $vr31, $vr27, $vr14 ++# CHECK: encoding: [0x7f,0xbb,0x11,0x71] ++vbitrev.d $vr31, $vr27, $vr14 ++ ++# CHECK: vpackev.b $vr22, $vr24, $vr19 ++# CHECK: encoding: [0x16,0x4f,0x16,0x71] ++vpackev.b $vr22, $vr24, $vr19 ++ ++# CHECK: vpackev.h $vr28, $vr2, $vr18 ++# CHECK: encoding: [0x5c,0xc8,0x16,0x71] ++vpackev.h $vr28, $vr2, $vr18 ++ ++# CHECK: vpackev.w $vr21, $vr3, $vr4 ++# CHECK: encoding: [0x75,0x10,0x17,0x71] ++vpackev.w $vr21, $vr3, $vr4 ++ ++# CHECK: vpackev.d $vr24, $vr21, $vr11 ++# CHECK: encoding: [0xb8,0xae,0x17,0x71] ++vpackev.d $vr24, $vr21, $vr11 ++ ++# CHECK: vpackod.b $vr12, $vr31, $vr26 ++# CHECK: encoding: [0xec,0x6b,0x18,0x71] ++vpackod.b $vr12, $vr31, $vr26 ++ ++# CHECK: vpackod.h $vr25, $vr3, $vr16 ++# CHECK: encoding: [0x79,0xc0,0x18,0x71] ++vpackod.h $vr25, $vr3, $vr16 ++ ++# CHECK: vpackod.w $vr21, $vr18, $vr15 ++# CHECK: encoding: [0x55,0x3e,0x19,0x71] ++vpackod.w $vr21, $vr18, $vr15 ++ ++# CHECK: vpackod.d $vr2, $vr3, $vr0 ++# CHECK: encoding: [0x62,0x80,0x19,0x71] ++vpackod.d $vr2, $vr3, $vr0 ++ ++# CHECK: vilvl.b $vr8, $vr8, $vr28 ++# CHECK: encoding: [0x08,0x71,0x1a,0x71] ++vilvl.b $vr8, $vr8, $vr28 ++ ++# CHECK: vilvl.h $vr20, $vr0, $vr31 ++# CHECK: encoding: [0x14,0xfc,0x1a,0x71] ++vilvl.h $vr20, $vr0, $vr31 ++ ++# CHECK: vilvl.w $vr11, $vr10, $vr17 ++# CHECK: encoding: [0x4b,0x45,0x1b,0x71] ++vilvl.w $vr11, $vr10, $vr17 ++ ++# CHECK: vilvl.d $vr7, $vr7, $vr1 ++# CHECK: encoding: [0xe7,0x84,0x1b,0x71] ++vilvl.d $vr7, $vr7, $vr1 ++ ++# CHECK: vilvh.b $vr11, $vr11, $vr1 ++# CHECK: encoding: [0x6b,0x05,0x1c,0x71] ++vilvh.b $vr11, $vr11, $vr1 ++ ++# CHECK: vilvh.h $vr0, $vr31, $vr13 ++# CHECK: encoding: [0xe0,0xb7,0x1c,0x71] ++vilvh.h $vr0, $vr31, $vr13 ++ ++# CHECK: vilvh.w $vr28, $vr21, $vr7 ++# CHECK: encoding: [0xbc,0x1e,0x1d,0x71] ++vilvh.w $vr28, $vr21, $vr7 ++ ++# CHECK: vilvh.d $vr23, $vr3, $vr19 ++# CHECK: encoding: [0x77,0xcc,0x1d,0x71] ++vilvh.d $vr23, $vr3, $vr19 ++ ++# CHECK: vpickev.b $vr1, $vr21, $vr8 ++# CHECK: encoding: [0xa1,0x22,0x1e,0x71] ++vpickev.b $vr1, $vr21, $vr8 ++ ++# CHECK: vpickev.h $vr16, $vr1, $vr9 ++# CHECK: encoding: [0x30,0xa4,0x1e,0x71] ++vpickev.h $vr16, $vr1, $vr9 ++ ++# CHECK: vpickev.w $vr13, $vr13, $vr4 ++# CHECK: encoding: [0xad,0x11,0x1f,0x71] ++vpickev.w $vr13, $vr13, $vr4 ++ ++# CHECK: vpickev.d $vr11, $vr30, $vr30 ++# CHECK: encoding: [0xcb,0xfb,0x1f,0x71] ++vpickev.d $vr11, $vr30, $vr30 ++ ++# CHECK: vpickod.b $vr7, $vr11, $vr13 ++# CHECK: encoding: [0x67,0x35,0x20,0x71] ++vpickod.b $vr7, $vr11, $vr13 ++ ++# CHECK: vpickod.h $vr18, $vr3, $vr1 ++# CHECK: encoding: [0x72,0x84,0x20,0x71] ++vpickod.h $vr18, $vr3, $vr1 ++ ++# CHECK: vpickod.w $vr3, $vr16, $vr19 ++# CHECK: encoding: [0x03,0x4e,0x21,0x71] ++vpickod.w $vr3, $vr16, $vr19 ++ ++# CHECK: vpickod.d $vr12, $vr13, $vr21 ++# CHECK: encoding: [0xac,0xd5,0x21,0x71] ++vpickod.d $vr12, $vr13, $vr21 ++ ++# CHECK: vreplve.b $vr15, $vr17, $r19 ++# CHECK: encoding: [0x2f,0x4e,0x22,0x71] ++vreplve.b $vr15, $vr17, $r19 ++ ++# CHECK: vreplve.h $vr14, $vr23, $r4 ++# CHECK: encoding: [0xee,0x92,0x22,0x71] ++vreplve.h $vr14, $vr23, $r4 ++ ++# CHECK: vreplve.w $vr29, $vr19, $r27 ++# CHECK: encoding: [0x7d,0x6e,0x23,0x71] ++vreplve.w $vr29, $vr19, $r27 ++ ++# CHECK: vreplve.d $vr13, $vr20, $r20 ++# CHECK: encoding: [0x8d,0xd2,0x23,0x71] ++vreplve.d $vr13, $vr20, $r20 ++ ++# CHECK: vand.v $vr25, $vr2, $vr21 ++# CHECK: encoding: [0x59,0x54,0x26,0x71] ++vand.v $vr25, $vr2, $vr21 ++ ++# CHECK: vor.v $vr4, $vr27, $vr16 ++# CHECK: encoding: [0x64,0xc3,0x26,0x71] ++vor.v $vr4, $vr27, $vr16 ++ ++# CHECK: vxor.v $vr30, $vr25, $vr4 ++# CHECK: encoding: [0x3e,0x13,0x27,0x71] ++vxor.v $vr30, $vr25, $vr4 ++ ++# CHECK: vnor.v $vr9, $vr2, $vr22 ++# CHECK: encoding: [0x49,0xd8,0x27,0x71] ++vnor.v $vr9, $vr2, $vr22 ++ ++# CHECK: vandn.v $vr20, $vr26, $vr4 ++# CHECK: encoding: [0x54,0x13,0x28,0x71] ++vandn.v $vr20, $vr26, $vr4 ++ ++# CHECK: vorn.v $vr6, $vr21, $vr30 ++# CHECK: encoding: [0xa6,0xfa,0x28,0x71] ++vorn.v $vr6, $vr21, $vr30 ++ ++# CHECK: vfrstp.b $vr11, $vr9, $vr13 ++# CHECK: encoding: [0x2b,0x35,0x2b,0x71] ++vfrstp.b $vr11, $vr9, $vr13 ++ ++# CHECK: vfrstp.h $vr21, $vr26, $vr22 ++# CHECK: encoding: [0x55,0xdb,0x2b,0x71] ++vfrstp.h $vr21, $vr26, $vr22 ++ ++# CHECK: vadd.q $vr9, $vr7, $vr16 ++# CHECK: encoding: [0xe9,0x40,0x2d,0x71] ++vadd.q $vr9, $vr7, $vr16 ++ ++# CHECK: vsub.q $vr2, $vr1, $vr16 ++# CHECK: encoding: [0x22,0xc0,0x2d,0x71] ++vsub.q $vr2, $vr1, $vr16 ++ ++# CHECK: vsigncov.b $vr2, $vr2, $vr14 ++# CHECK: encoding: [0x42,0x38,0x2e,0x71] ++vsigncov.b $vr2, $vr2, $vr14 ++ ++# CHECK: vsigncov.h $vr21, $vr21, $vr13 ++# CHECK: encoding: [0xb5,0xb6,0x2e,0x71] ++vsigncov.h $vr21, $vr21, $vr13 ++ ++# CHECK: vsigncov.w $vr21, $vr7, $vr5 ++# CHECK: encoding: [0xf5,0x14,0x2f,0x71] ++vsigncov.w $vr21, $vr7, $vr5 ++ ++# CHECK: vsigncov.d $vr10, $vr10, $vr3 ++# CHECK: encoding: [0x4a,0x8d,0x2f,0x71] ++vsigncov.d $vr10, $vr10, $vr3 ++ ++# CHECK: vfadd.s $vr10, $vr4, $vr1 ++# CHECK: encoding: [0x8a,0x84,0x30,0x71] ++vfadd.s $vr10, $vr4, $vr1 ++ ++# CHECK: vfadd.d $vr15, $vr27, $vr2 ++# CHECK: encoding: [0x6f,0x0b,0x31,0x71] ++vfadd.d $vr15, $vr27, $vr2 ++ ++# CHECK: vfsub.s $vr14, $vr16, $vr9 ++# CHECK: encoding: [0x0e,0xa6,0x32,0x71] ++vfsub.s $vr14, $vr16, $vr9 ++ ++# CHECK: vfsub.d $vr4, $vr1, $vr8 ++# CHECK: encoding: [0x24,0x20,0x33,0x71] ++vfsub.d $vr4, $vr1, $vr8 ++ ++# CHECK: vfmul.s $vr0, $vr18, $vr6 ++# CHECK: encoding: [0x40,0x9a,0x38,0x71] ++vfmul.s $vr0, $vr18, $vr6 ++ ++# CHECK: vfmul.d $vr27, $vr31, $vr30 ++# CHECK: encoding: [0xfb,0x7b,0x39,0x71] ++vfmul.d $vr27, $vr31, $vr30 ++ ++# CHECK: vfdiv.s $vr3, $vr7, $vr6 ++# CHECK: encoding: [0xe3,0x98,0x3a,0x71] ++vfdiv.s $vr3, $vr7, $vr6 ++ ++# CHECK: vfdiv.d $vr16, $vr6, $vr30 ++# CHECK: encoding: [0xd0,0x78,0x3b,0x71] ++vfdiv.d $vr16, $vr6, $vr30 ++ ++# CHECK: vfmax.s $vr18, $vr30, $vr8 ++# CHECK: encoding: [0xd2,0xa3,0x3c,0x71] ++vfmax.s $vr18, $vr30, $vr8 ++ ++# CHECK: vfmax.d $vr19, $vr8, $vr24 ++# CHECK: encoding: [0x13,0x61,0x3d,0x71] ++vfmax.d $vr19, $vr8, $vr24 ++ ++# CHECK: vfmin.s $vr24, $vr26, $vr6 ++# CHECK: encoding: [0x58,0x9b,0x3e,0x71] ++vfmin.s $vr24, $vr26, $vr6 ++ ++# CHECK: vfmin.d $vr16, $vr25, $vr1 ++# CHECK: encoding: [0x30,0x07,0x3f,0x71] ++vfmin.d $vr16, $vr25, $vr1 ++ ++# CHECK: vfmaxa.s $vr8, $vr7, $vr14 ++# CHECK: encoding: [0xe8,0xb8,0x40,0x71] ++vfmaxa.s $vr8, $vr7, $vr14 ++ ++# CHECK: vfmaxa.d $vr10, $vr8, $vr4 ++# CHECK: encoding: [0x0a,0x11,0x41,0x71] ++vfmaxa.d $vr10, $vr8, $vr4 ++ ++# CHECK: vfmina.s $vr16, $vr6, $vr18 ++# CHECK: encoding: [0xd0,0xc8,0x42,0x71] ++vfmina.s $vr16, $vr6, $vr18 ++ ++# CHECK: vfmina.d $vr26, $vr7, $vr14 ++# CHECK: encoding: [0xfa,0x38,0x43,0x71] ++vfmina.d $vr26, $vr7, $vr14 ++ ++# CHECK: vfcvt.h.s $vr30, $vr4, $vr24 ++# CHECK: encoding: [0x9e,0x60,0x46,0x71] ++vfcvt.h.s $vr30, $vr4, $vr24 ++ ++# CHECK: vfcvt.s.d $vr16, $vr17, $vr4 ++# CHECK: encoding: [0x30,0x92,0x46,0x71] ++vfcvt.s.d $vr16, $vr17, $vr4 ++ ++# CHECK: vffint.s.l $vr25, $vr23, $vr10 ++# CHECK: encoding: [0xf9,0x2a,0x48,0x71] ++vffint.s.l $vr25, $vr23, $vr10 ++ ++# CHECK: vftint.w.d $vr9, $vr22, $vr27 ++# CHECK: encoding: [0xc9,0xee,0x49,0x71] ++vftint.w.d $vr9, $vr22, $vr27 ++ ++# CHECK: vftintrm.w.d $vr31, $vr10, $vr29 ++# CHECK: encoding: [0x5f,0x75,0x4a,0x71] ++vftintrm.w.d $vr31, $vr10, $vr29 ++ ++# CHECK: vftintrp.w.d $vr23, $vr13, $vr15 ++# CHECK: encoding: [0xb7,0xbd,0x4a,0x71] ++vftintrp.w.d $vr23, $vr13, $vr15 ++ ++# CHECK: vftintrz.w.d $vr18, $vr9, $vr6 ++# CHECK: encoding: [0x32,0x19,0x4b,0x71] ++vftintrz.w.d $vr18, $vr9, $vr6 ++ ++# CHECK: vftintrne.w.d $vr21, $vr12, $vr30 ++# CHECK: encoding: [0x95,0xf9,0x4b,0x71] ++vftintrne.w.d $vr21, $vr12, $vr30 ++ ++# CHECK: vshuf.h $vr3, $vr11, $vr2 ++# CHECK: encoding: [0x63,0x89,0x7a,0x71] ++vshuf.h $vr3, $vr11, $vr2 ++ ++# CHECK: vshuf.w $vr21, $vr4, $vr29 ++# CHECK: encoding: [0x95,0x74,0x7b,0x71] ++vshuf.w $vr21, $vr4, $vr29 ++ ++# CHECK: vshuf.d $vr11, $vr23, $vr18 ++# CHECK: encoding: [0xeb,0xca,0x7b,0x71] ++vshuf.d $vr11, $vr23, $vr18 ++ ++# CHECK: vseqi.b $vr27, $vr14, 7 ++# CHECK: encoding: [0xdb,0x1d,0x80,0x72] ++vseqi.b $vr27, $vr14, 7 ++ ++# CHECK: vseqi.h $vr23, $vr27, -6 ++# CHECK: encoding: [0x77,0xeb,0x80,0x72] ++vseqi.h $vr23, $vr27, -6 ++ ++# CHECK: vseqi.w $vr8, $vr8, -16 ++# CHECK: encoding: [0x08,0x41,0x81,0x72] ++vseqi.w $vr8, $vr8, -16 ++ ++# CHECK: vseqi.d $vr11, $vr5, 5 ++# CHECK: encoding: [0xab,0x94,0x81,0x72] ++vseqi.d $vr11, $vr5, 5 ++ ++# CHECK: vslei.b $vr8, $vr27, 7 ++# CHECK: encoding: [0x68,0x1f,0x82,0x72] ++vslei.b $vr8, $vr27, 7 ++ ++# CHECK: vslei.h $vr27, $vr29, -5 ++# CHECK: encoding: [0xbb,0xef,0x82,0x72] ++vslei.h $vr27, $vr29, -5 ++ ++# CHECK: vslei.w $vr23, $vr13, -3 ++# CHECK: encoding: [0xb7,0x75,0x83,0x72] ++vslei.w $vr23, $vr13, -3 ++ ++# CHECK: vslei.d $vr5, $vr15, -8 ++# CHECK: encoding: [0xe5,0xe1,0x83,0x72] ++vslei.d $vr5, $vr15, -8 ++ ++# CHECK: vslei.bu $vr29, $vr10, 9 ++# CHECK: encoding: [0x5d,0x25,0x84,0x72] ++vslei.bu $vr29, $vr10, 9 ++ ++# CHECK: vslei.hu $vr29, $vr18, 11 ++# CHECK: encoding: [0x5d,0xae,0x84,0x72] ++vslei.hu $vr29, $vr18, 11 ++ ++# CHECK: vslei.wu $vr8, $vr1, 2 ++# CHECK: encoding: [0x28,0x08,0x85,0x72] ++vslei.wu $vr8, $vr1, 2 ++ ++# CHECK: vslei.du $vr16, $vr5, 10 ++# CHECK: encoding: [0xb0,0xa8,0x85,0x72] ++vslei.du $vr16, $vr5, 10 ++ ++# CHECK: vslti.b $vr8, $vr4, -2 ++# CHECK: encoding: [0x88,0x78,0x86,0x72] ++vslti.b $vr8, $vr4, -2 ++ ++# CHECK: vslti.h $vr26, $vr7, -14 ++# CHECK: encoding: [0xfa,0xc8,0x86,0x72] ++vslti.h $vr26, $vr7, -14 ++ ++# CHECK: vslti.w $vr28, $vr8, 12 ++# CHECK: encoding: [0x1c,0x31,0x87,0x72] ++vslti.w $vr28, $vr8, 12 ++ ++# CHECK: vslti.d $vr4, $vr27, 9 ++# CHECK: encoding: [0x64,0xa7,0x87,0x72] ++vslti.d $vr4, $vr27, 9 ++ ++# CHECK: vslti.bu $vr10, $vr14, 18 ++# CHECK: encoding: [0xca,0x49,0x88,0x72] ++vslti.bu $vr10, $vr14, 18 ++ ++# CHECK: vslti.hu $vr28, $vr28, 30 ++# CHECK: encoding: [0x9c,0xfb,0x88,0x72] ++vslti.hu $vr28, $vr28, 30 ++ ++# CHECK: vslti.wu $vr15, $vr27, 27 ++# CHECK: encoding: [0x6f,0x6f,0x89,0x72] ++vslti.wu $vr15, $vr27, 27 ++ ++# CHECK: vslti.du $vr30, $vr17, 19 ++# CHECK: encoding: [0x3e,0xce,0x89,0x72] ++vslti.du $vr30, $vr17, 19 ++ ++# CHECK: vaddi.bu $vr6, $vr1, 18 ++# CHECK: encoding: [0x26,0x48,0x8a,0x72] ++vaddi.bu $vr6, $vr1, 18 ++ ++# CHECK: vaddi.hu $vr12, $vr14, 5 ++# CHECK: encoding: [0xcc,0x95,0x8a,0x72] ++vaddi.hu $vr12, $vr14, 5 ++ ++# CHECK: vaddi.wu $vr28, $vr0, 26 ++# CHECK: encoding: [0x1c,0x68,0x8b,0x72] ++vaddi.wu $vr28, $vr0, 26 ++ ++# CHECK: vaddi.du $vr10, $vr5, 2 ++# CHECK: encoding: [0xaa,0x88,0x8b,0x72] ++vaddi.du $vr10, $vr5, 2 ++ ++# CHECK: vsubi.bu $vr22, $vr28, 2 ++# CHECK: encoding: [0x96,0x0b,0x8c,0x72] ++vsubi.bu $vr22, $vr28, 2 ++ ++# CHECK: vsubi.hu $vr0, $vr22, 31 ++# CHECK: encoding: [0xc0,0xfe,0x8c,0x72] ++vsubi.hu $vr0, $vr22, 31 ++ ++# CHECK: vsubi.wu $vr20, $vr6, 5 ++# CHECK: encoding: [0xd4,0x14,0x8d,0x72] ++vsubi.wu $vr20, $vr6, 5 ++ ++# CHECK: vsubi.du $vr18, $vr11, 1 ++# CHECK: encoding: [0x72,0x85,0x8d,0x72] ++vsubi.du $vr18, $vr11, 1 ++ ++# CHECK: vbsll.v $vr4, $vr26, 4 ++# CHECK: encoding: [0x44,0x13,0x8e,0x72] ++vbsll.v $vr4, $vr26, 4 ++ ++# CHECK: vbsrl.v $vr7, $vr31, 15 ++# CHECK: encoding: [0xe7,0xbf,0x8e,0x72] ++vbsrl.v $vr7, $vr31, 15 ++ ++# CHECK: vmaxi.b $vr19, $vr15, 14 ++# CHECK: encoding: [0xf3,0x39,0x90,0x72] ++vmaxi.b $vr19, $vr15, 14 ++ ++# CHECK: vmaxi.h $vr25, $vr3, -12 ++# CHECK: encoding: [0x79,0xd0,0x90,0x72] ++vmaxi.h $vr25, $vr3, -12 ++ ++# CHECK: vmaxi.w $vr20, $vr25, 5 ++# CHECK: encoding: [0x34,0x17,0x91,0x72] ++vmaxi.w $vr20, $vr25, 5 ++ ++# CHECK: vmaxi.d $vr9, $vr10, 12 ++# CHECK: encoding: [0x49,0xb1,0x91,0x72] ++vmaxi.d $vr9, $vr10, 12 ++ ++# CHECK: vmini.b $vr30, $vr21, -4 ++# CHECK: encoding: [0xbe,0x72,0x92,0x72] ++vmini.b $vr30, $vr21, -4 ++ ++# CHECK: vmini.h $vr11, $vr28, -3 ++# CHECK: encoding: [0x8b,0xf7,0x92,0x72] ++vmini.h $vr11, $vr28, -3 ++ ++# CHECK: vmini.w $vr6, $vr25, -9 ++# CHECK: encoding: [0x26,0x5f,0x93,0x72] ++vmini.w $vr6, $vr25, -9 ++ ++# CHECK: vmini.d $vr28, $vr8, 2 ++# CHECK: encoding: [0x1c,0x89,0x93,0x72] ++vmini.d $vr28, $vr8, 2 ++ ++# CHECK: vmaxi.bu $vr13, $vr24, 19 ++# CHECK: encoding: [0x0d,0x4f,0x94,0x72] ++vmaxi.bu $vr13, $vr24, 19 ++ ++# CHECK: vmaxi.hu $vr3, $vr1, 22 ++# CHECK: encoding: [0x23,0xd8,0x94,0x72] ++vmaxi.hu $vr3, $vr1, 22 ++ ++# CHECK: vmaxi.wu $vr1, $vr3, 23 ++# CHECK: encoding: [0x61,0x5c,0x95,0x72] ++vmaxi.wu $vr1, $vr3, 23 ++ ++# CHECK: vmaxi.du $vr6, $vr18, 21 ++# CHECK: encoding: [0x46,0xd6,0x95,0x72] ++vmaxi.du $vr6, $vr18, 21 ++ ++# CHECK: vmini.bu $vr10, $vr2, 20 ++# CHECK: encoding: [0x4a,0x50,0x96,0x72] ++vmini.bu $vr10, $vr2, 20 ++ ++# CHECK: vmini.hu $vr17, $vr17, 15 ++# CHECK: encoding: [0x31,0xbe,0x96,0x72] ++vmini.hu $vr17, $vr17, 15 ++ ++# CHECK: vmini.wu $vr26, $vr27, 23 ++# CHECK: encoding: [0x7a,0x5f,0x97,0x72] ++vmini.wu $vr26, $vr27, 23 ++ ++# CHECK: vmini.du $vr12, $vr27, 8 ++# CHECK: encoding: [0x6c,0xa3,0x97,0x72] ++vmini.du $vr12, $vr27, 8 ++ ++# CHECK: vfrstpi.b $vr26, $vr8, 9 ++# CHECK: encoding: [0x1a,0x25,0x9a,0x72] ++vfrstpi.b $vr26, $vr8, 9 ++ ++# CHECK: vfrstpi.h $vr16, $vr2, 20 ++# CHECK: encoding: [0x50,0xd0,0x9a,0x72] ++vfrstpi.h $vr16, $vr2, 20 ++ ++# CHECK: vclo.b $vr5, $vr17 ++# CHECK: encoding: [0x25,0x02,0x9c,0x72] ++vclo.b $vr5, $vr17 ++ ++# CHECK: vclo.h $vr8, $vr4 ++# CHECK: encoding: [0x88,0x04,0x9c,0x72] ++vclo.h $vr8, $vr4 ++ ++# CHECK: vclo.w $vr1, $vr13 ++# CHECK: encoding: [0xa1,0x09,0x9c,0x72] ++vclo.w $vr1, $vr13 ++ ++# CHECK: vclo.d $vr0, $vr23 ++# CHECK: encoding: [0xe0,0x0e,0x9c,0x72] ++vclo.d $vr0, $vr23 ++ ++# CHECK: vclz.b $vr4, $vr25 ++# CHECK: encoding: [0x24,0x13,0x9c,0x72] ++vclz.b $vr4, $vr25 ++ ++# CHECK: vclz.h $vr1, $vr25 ++# CHECK: encoding: [0x21,0x17,0x9c,0x72] ++vclz.h $vr1, $vr25 ++ ++# CHECK: vclz.w $vr1, $vr5 ++# CHECK: encoding: [0xa1,0x18,0x9c,0x72] ++vclz.w $vr1, $vr5 ++ ++# CHECK: vclz.d $vr16, $vr17 ++# CHECK: encoding: [0x30,0x1e,0x9c,0x72] ++vclz.d $vr16, $vr17 ++ ++# CHECK: vpcnt.b $vr4, $vr3 ++# CHECK: encoding: [0x64,0x20,0x9c,0x72] ++vpcnt.b $vr4, $vr3 ++ ++# CHECK: vpcnt.h $vr15, $vr17 ++# CHECK: encoding: [0x2f,0x26,0x9c,0x72] ++vpcnt.h $vr15, $vr17 ++ ++# CHECK: vpcnt.w $vr13, $vr8 ++# CHECK: encoding: [0x0d,0x29,0x9c,0x72] ++vpcnt.w $vr13, $vr8 ++ ++# CHECK: vpcnt.d $vr0, $vr8 ++# CHECK: encoding: [0x00,0x2d,0x9c,0x72] ++vpcnt.d $vr0, $vr8 ++ ++# CHECK: vneg.b $vr14, $vr24 ++# CHECK: encoding: [0x0e,0x33,0x9c,0x72] ++vneg.b $vr14, $vr24 ++ ++# CHECK: vneg.h $vr24, $vr7 ++# CHECK: encoding: [0xf8,0x34,0x9c,0x72] ++vneg.h $vr24, $vr7 ++ ++# CHECK: vneg.w $vr19, $vr5 ++# CHECK: encoding: [0xb3,0x38,0x9c,0x72] ++vneg.w $vr19, $vr5 ++ ++# CHECK: vneg.d $vr3, $vr28 ++# CHECK: encoding: [0x83,0x3f,0x9c,0x72] ++vneg.d $vr3, $vr28 ++ ++# CHECK: vmskltz.b $vr31, $vr25 ++# CHECK: encoding: [0x3f,0x43,0x9c,0x72] ++vmskltz.b $vr31, $vr25 ++ ++# CHECK: vmskltz.h $vr9, $vr20 ++# CHECK: encoding: [0x89,0x46,0x9c,0x72] ++vmskltz.h $vr9, $vr20 ++ ++# CHECK: vmskltz.w $vr22, $vr26 ++# CHECK: encoding: [0x56,0x4b,0x9c,0x72] ++vmskltz.w $vr22, $vr26 ++ ++# CHECK: vmskltz.d $vr28, $vr10 ++# CHECK: encoding: [0x5c,0x4d,0x9c,0x72] ++vmskltz.d $vr28, $vr10 ++ ++# CHECK: vmskgez.b $vr7, $vr5 ++# CHECK: encoding: [0xa7,0x50,0x9c,0x72] ++vmskgez.b $vr7, $vr5 ++ ++# CHECK: vmsknz.b $vr20, $vr12 ++# CHECK: encoding: [0x94,0x61,0x9c,0x72] ++vmsknz.b $vr20, $vr12 ++ ++# CHECK: vseteqz.v $fcc5, $vr14 ++# CHECK: encoding: [0xc5,0x99,0x9c,0x72] ++vseteqz.v $fcc5, $vr14 ++ ++# CHECK: vsetnez.v $fcc2, $vr8 ++# CHECK: encoding: [0x02,0x9d,0x9c,0x72] ++vsetnez.v $fcc2, $vr8 ++ ++# CHECK: vsetanyeqz.b $fcc0, $vr20 ++# CHECK: encoding: [0x80,0xa2,0x9c,0x72] ++vsetanyeqz.b $fcc0, $vr20 ++ ++# CHECK: vsetanyeqz.h $fcc4, $vr16 ++# CHECK: encoding: [0x04,0xa6,0x9c,0x72] ++vsetanyeqz.h $fcc4, $vr16 ++ ++# CHECK: vsetanyeqz.w $fcc7, $vr2 ++# CHECK: encoding: [0x47,0xa8,0x9c,0x72] ++vsetanyeqz.w $fcc7, $vr2 ++ ++# CHECK: vsetanyeqz.d $fcc4, $vr12 ++# CHECK: encoding: [0x84,0xad,0x9c,0x72] ++vsetanyeqz.d $fcc4, $vr12 ++ ++# CHECK: vsetallnez.b $fcc7, $vr0 ++# CHECK: encoding: [0x07,0xb0,0x9c,0x72] ++vsetallnez.b $fcc7, $vr0 ++ ++# CHECK: vsetallnez.h $fcc2, $vr11 ++# CHECK: encoding: [0x62,0xb5,0x9c,0x72] ++vsetallnez.h $fcc2, $vr11 ++ ++# CHECK: vsetallnez.w $fcc6, $vr25 ++# CHECK: encoding: [0x26,0xbb,0x9c,0x72] ++vsetallnez.w $fcc6, $vr25 ++ ++# CHECK: vsetallnez.d $fcc7, $vr31 ++# CHECK: encoding: [0xe7,0xbf,0x9c,0x72] ++vsetallnez.d $fcc7, $vr31 ++ ++# CHECK: vflogb.s $vr14, $vr28 ++# CHECK: encoding: [0x8e,0xc7,0x9c,0x72] ++vflogb.s $vr14, $vr28 ++ ++# CHECK: vflogb.d $vr29, $vr9 ++# CHECK: encoding: [0x3d,0xc9,0x9c,0x72] ++vflogb.d $vr29, $vr9 ++ ++# CHECK: vfclass.s $vr3, $vr13 ++# CHECK: encoding: [0xa3,0xd5,0x9c,0x72] ++vfclass.s $vr3, $vr13 ++ ++# CHECK: vfclass.d $vr5, $vr15 ++# CHECK: encoding: [0xe5,0xd9,0x9c,0x72] ++vfclass.d $vr5, $vr15 ++ ++# CHECK: vfsqrt.s $vr19, $vr27 ++# CHECK: encoding: [0x73,0xe7,0x9c,0x72] ++vfsqrt.s $vr19, $vr27 ++ ++# CHECK: vfsqrt.d $vr31, $vr3 ++# CHECK: encoding: [0x7f,0xe8,0x9c,0x72] ++vfsqrt.d $vr31, $vr3 ++ ++# CHECK: vfrecip.s $vr24, $vr16 ++# CHECK: encoding: [0x18,0xf6,0x9c,0x72] ++vfrecip.s $vr24, $vr16 ++ ++# CHECK: vfrecip.d $vr23, $vr19 ++# CHECK: encoding: [0x77,0xfa,0x9c,0x72] ++vfrecip.d $vr23, $vr19 ++ ++# CHECK: vfrsqrt.s $vr18, $vr15 ++# CHECK: encoding: [0xf2,0x05,0x9d,0x72] ++vfrsqrt.s $vr18, $vr15 ++ ++# CHECK: vfrsqrt.d $vr18, $vr31 ++# CHECK: encoding: [0xf2,0x0b,0x9d,0x72] ++vfrsqrt.d $vr18, $vr31 ++ ++# CHECK: vfrint.s $vr26, $vr11 ++# CHECK: encoding: [0x7a,0x35,0x9d,0x72] ++vfrint.s $vr26, $vr11 ++ ++# CHECK: vfrint.d $vr24, $vr18 ++# CHECK: encoding: [0x58,0x3a,0x9d,0x72] ++vfrint.d $vr24, $vr18 ++ ++# CHECK: vfrintrm.s $vr5, $vr3 ++# CHECK: encoding: [0x65,0x44,0x9d,0x72] ++vfrintrm.s $vr5, $vr3 ++ ++# CHECK: vfrintrm.d $vr23, $vr10 ++# CHECK: encoding: [0x57,0x49,0x9d,0x72] ++vfrintrm.d $vr23, $vr10 ++ ++# CHECK: vfrintrp.s $vr20, $vr2 ++# CHECK: encoding: [0x54,0x54,0x9d,0x72] ++vfrintrp.s $vr20, $vr2 ++ ++# CHECK: vfrintrp.d $vr30, $vr17 ++# CHECK: encoding: [0x3e,0x5a,0x9d,0x72] ++vfrintrp.d $vr30, $vr17 ++ ++# CHECK: vfrintrz.s $vr19, $vr6 ++# CHECK: encoding: [0xd3,0x64,0x9d,0x72] ++vfrintrz.s $vr19, $vr6 ++ ++# CHECK: vfrintrz.d $vr16, $vr18 ++# CHECK: encoding: [0x50,0x6a,0x9d,0x72] ++vfrintrz.d $vr16, $vr18 ++ ++# CHECK: vfrintrne.s $vr8, $vr24 ++# CHECK: encoding: [0x08,0x77,0x9d,0x72] ++vfrintrne.s $vr8, $vr24 ++ ++# CHECK: vfrintrne.d $vr6, $vr5 ++# CHECK: encoding: [0xa6,0x78,0x9d,0x72] ++vfrintrne.d $vr6, $vr5 ++ ++# CHECK: vfcvtl.s.h $vr4, $vr6 ++# CHECK: encoding: [0xc4,0xe8,0x9d,0x72] ++vfcvtl.s.h $vr4, $vr6 ++ ++# CHECK: vfcvth.s.h $vr16, $vr7 ++# CHECK: encoding: [0xf0,0xec,0x9d,0x72] ++vfcvth.s.h $vr16, $vr7 ++ ++# CHECK: vfcvtl.d.s $vr16, $vr10 ++# CHECK: encoding: [0x50,0xf1,0x9d,0x72] ++vfcvtl.d.s $vr16, $vr10 ++ ++# CHECK: vfcvth.d.s $vr28, $vr25 ++# CHECK: encoding: [0x3c,0xf7,0x9d,0x72] ++vfcvth.d.s $vr28, $vr25 ++ ++# CHECK: vffint.s.w $vr28, $vr16 ++# CHECK: encoding: [0x1c,0x02,0x9e,0x72] ++vffint.s.w $vr28, $vr16 ++ ++# CHECK: vffint.s.wu $vr4, $vr31 ++# CHECK: encoding: [0xe4,0x07,0x9e,0x72] ++vffint.s.wu $vr4, $vr31 ++ ++# CHECK: vffint.d.l $vr18, $vr25 ++# CHECK: encoding: [0x32,0x0b,0x9e,0x72] ++vffint.d.l $vr18, $vr25 ++ ++# CHECK: vffint.d.lu $vr24, $vr17 ++# CHECK: encoding: [0x38,0x0e,0x9e,0x72] ++vffint.d.lu $vr24, $vr17 ++ ++# CHECK: vffintl.d.w $vr2, $vr27 ++# CHECK: encoding: [0x62,0x13,0x9e,0x72] ++vffintl.d.w $vr2, $vr27 ++ ++# CHECK: vffinth.d.w $vr4, $vr16 ++# CHECK: encoding: [0x04,0x16,0x9e,0x72] ++vffinth.d.w $vr4, $vr16 ++ ++# CHECK: vftint.w.s $vr17, $vr0 ++# CHECK: encoding: [0x11,0x30,0x9e,0x72] ++vftint.w.s $vr17, $vr0 ++ ++# CHECK: vftint.l.d $vr23, $vr18 ++# CHECK: encoding: [0x57,0x36,0x9e,0x72] ++vftint.l.d $vr23, $vr18 ++ ++# CHECK: vftintrm.w.s $vr23, $vr4 ++# CHECK: encoding: [0x97,0x38,0x9e,0x72] ++vftintrm.w.s $vr23, $vr4 ++ ++# CHECK: vftintrm.l.d $vr30, $vr14 ++# CHECK: encoding: [0xde,0x3d,0x9e,0x72] ++vftintrm.l.d $vr30, $vr14 ++ ++# CHECK: vftintrp.w.s $vr7, $vr0 ++# CHECK: encoding: [0x07,0x40,0x9e,0x72] ++vftintrp.w.s $vr7, $vr0 ++ ++# CHECK: vftintrp.l.d $vr28, $vr20 ++# CHECK: encoding: [0x9c,0x46,0x9e,0x72] ++vftintrp.l.d $vr28, $vr20 ++ ++# CHECK: vftintrz.w.s $vr28, $vr31 ++# CHECK: encoding: [0xfc,0x4b,0x9e,0x72] ++vftintrz.w.s $vr28, $vr31 ++ ++# CHECK: vftintrz.l.d $vr18, $vr0 ++# CHECK: encoding: [0x12,0x4c,0x9e,0x72] ++vftintrz.l.d $vr18, $vr0 ++ ++# CHECK: vftintrne.w.s $vr14, $vr17 ++# CHECK: encoding: [0x2e,0x52,0x9e,0x72] ++vftintrne.w.s $vr14, $vr17 ++ ++# CHECK: vftintrne.l.d $vr22, $vr18 ++# CHECK: encoding: [0x56,0x56,0x9e,0x72] ++vftintrne.l.d $vr22, $vr18 ++ ++# CHECK: vftint.wu.s $vr26, $vr25 ++# CHECK: encoding: [0x3a,0x5b,0x9e,0x72] ++vftint.wu.s $vr26, $vr25 ++ ++# CHECK: vftint.lu.d $vr9, $vr27 ++# CHECK: encoding: [0x69,0x5f,0x9e,0x72] ++vftint.lu.d $vr9, $vr27 ++ ++# CHECK: vftintrz.wu.s $vr26, $vr22 ++# CHECK: encoding: [0xda,0x72,0x9e,0x72] ++vftintrz.wu.s $vr26, $vr22 ++ ++# CHECK: vftintrz.lu.d $vr29, $vr20 ++# CHECK: encoding: [0x9d,0x76,0x9e,0x72] ++vftintrz.lu.d $vr29, $vr20 ++ ++# CHECK: vftintl.l.s $vr22, $vr1 ++# CHECK: encoding: [0x36,0x80,0x9e,0x72] ++vftintl.l.s $vr22, $vr1 ++ ++# CHECK: vftinth.l.s $vr13, $vr24 ++# CHECK: encoding: [0x0d,0x87,0x9e,0x72] ++vftinth.l.s $vr13, $vr24 ++ ++# CHECK: vftintrml.l.s $vr8, $vr27 ++# CHECK: encoding: [0x68,0x8b,0x9e,0x72] ++vftintrml.l.s $vr8, $vr27 ++ ++# CHECK: vftintrmh.l.s $vr18, $vr28 ++# CHECK: encoding: [0x92,0x8f,0x9e,0x72] ++vftintrmh.l.s $vr18, $vr28 ++ ++# CHECK: vftintrpl.l.s $vr27, $vr28 ++# CHECK: encoding: [0x9b,0x93,0x9e,0x72] ++vftintrpl.l.s $vr27, $vr28 ++ ++# CHECK: vftintrph.l.s $vr20, $vr7 ++# CHECK: encoding: [0xf4,0x94,0x9e,0x72] ++vftintrph.l.s $vr20, $vr7 ++ ++# CHECK: vftintrzl.l.s $vr6, $vr2 ++# CHECK: encoding: [0x46,0x98,0x9e,0x72] ++vftintrzl.l.s $vr6, $vr2 ++ ++# CHECK: vftintrzh.l.s $vr21, $vr6 ++# CHECK: encoding: [0xd5,0x9c,0x9e,0x72] ++vftintrzh.l.s $vr21, $vr6 ++ ++# CHECK: vftintrnel.l.s $vr25, $vr3 ++# CHECK: encoding: [0x79,0xa0,0x9e,0x72] ++vftintrnel.l.s $vr25, $vr3 ++ ++# CHECK: vftintrneh.l.s $vr7, $vr5 ++# CHECK: encoding: [0xa7,0xa4,0x9e,0x72] ++vftintrneh.l.s $vr7, $vr5 ++ ++# CHECK: vexth.h.b $vr9, $vr2 ++# CHECK: encoding: [0x49,0xe0,0x9e,0x72] ++vexth.h.b $vr9, $vr2 ++ ++# CHECK: vexth.w.h $vr4, $vr27 ++# CHECK: encoding: [0x64,0xe7,0x9e,0x72] ++vexth.w.h $vr4, $vr27 ++ ++# CHECK: vexth.d.w $vr23, $vr1 ++# CHECK: encoding: [0x37,0xe8,0x9e,0x72] ++vexth.d.w $vr23, $vr1 ++ ++# CHECK: vexth.q.d $vr15, $vr6 ++# CHECK: encoding: [0xcf,0xec,0x9e,0x72] ++vexth.q.d $vr15, $vr6 ++ ++# CHECK: vexth.hu.bu $vr3, $vr2 ++# CHECK: encoding: [0x43,0xf0,0x9e,0x72] ++vexth.hu.bu $vr3, $vr2 ++ ++# CHECK: vexth.wu.hu $vr31, $vr26 ++# CHECK: encoding: [0x5f,0xf7,0x9e,0x72] ++vexth.wu.hu $vr31, $vr26 ++ ++# CHECK: vexth.du.wu $vr10, $vr31 ++# CHECK: encoding: [0xea,0xfb,0x9e,0x72] ++vexth.du.wu $vr10, $vr31 ++ ++# CHECK: vexth.qu.du $vr28, $vr8 ++# CHECK: encoding: [0x1c,0xfd,0x9e,0x72] ++vexth.qu.du $vr28, $vr8 ++ ++# CHECK: vreplgr2vr.b $vr15, $sp ++# CHECK: encoding: [0x6f,0x00,0x9f,0x72] ++vreplgr2vr.b $vr15, $sp ++ ++# CHECK: vreplgr2vr.h $vr10, $r23 ++# CHECK: encoding: [0xea,0x06,0x9f,0x72] ++vreplgr2vr.h $vr10, $r23 ++ ++# CHECK: vreplgr2vr.w $vr25, $r16 ++# CHECK: encoding: [0x19,0x0a,0x9f,0x72] ++vreplgr2vr.w $vr25, $r16 ++ ++# CHECK: vreplgr2vr.d $vr27, $r7 ++# CHECK: encoding: [0xfb,0x0c,0x9f,0x72] ++vreplgr2vr.d $vr27, $r7 ++ ++# CHECK: vrotri.b $vr24, $vr24, 7 ++# CHECK: encoding: [0x18,0x3f,0xa0,0x72] ++vrotri.b $vr24, $vr24, 7 ++ ++# CHECK: vrotri.h $vr1, $vr5, 0 ++# CHECK: encoding: [0xa1,0x40,0xa0,0x72] ++vrotri.h $vr1, $vr5, 0 ++ ++# CHECK: vrotri.w $vr10, $vr8, 12 ++# CHECK: encoding: [0x0a,0xb1,0xa0,0x72] ++vrotri.w $vr10, $vr8, 12 ++ ++# CHECK: vrotri.d $vr30, $vr29, 42 ++# CHECK: encoding: [0xbe,0xab,0xa1,0x72] ++vrotri.d $vr30, $vr29, 42 ++ ++# CHECK: vsrlri.b $vr1, $vr16, 3 ++# CHECK: encoding: [0x01,0x2e,0xa4,0x72] ++vsrlri.b $vr1, $vr16, 3 ++ ++# CHECK: vsrlri.h $vr28, $vr21, 1 ++# CHECK: encoding: [0xbc,0x46,0xa4,0x72] ++vsrlri.h $vr28, $vr21, 1 ++ ++# CHECK: vsrlri.w $vr18, $vr4, 15 ++# CHECK: encoding: [0x92,0xbc,0xa4,0x72] ++vsrlri.w $vr18, $vr4, 15 ++ ++# CHECK: vsrlri.d $vr30, $vr3, 19 ++# CHECK: encoding: [0x7e,0x4c,0xa5,0x72] ++vsrlri.d $vr30, $vr3, 19 ++ ++# CHECK: vsrari.b $vr13, $vr0, 7 ++# CHECK: encoding: [0x0d,0x3c,0xa8,0x72] ++vsrari.b $vr13, $vr0, 7 ++ ++# CHECK: vsrari.h $vr17, $vr9, 6 ++# CHECK: encoding: [0x31,0x59,0xa8,0x72] ++vsrari.h $vr17, $vr9, 6 ++ ++# CHECK: vsrari.w $vr3, $vr28, 6 ++# CHECK: encoding: [0x83,0x9b,0xa8,0x72] ++vsrari.w $vr3, $vr28, 6 ++ ++# CHECK: vsrari.d $vr4, $vr2, 34 ++# CHECK: encoding: [0x44,0x88,0xa9,0x72] ++vsrari.d $vr4, $vr2, 34 ++ ++# CHECK: vinsgr2vr.b $vr8, $r8, 4 ++# CHECK: encoding: [0x08,0x91,0xeb,0x72] ++vinsgr2vr.b $vr8, $r8, 4 ++ ++# CHECK: vinsgr2vr.h $vr13, $r7, 1 ++# CHECK: encoding: [0xed,0xc4,0xeb,0x72] ++vinsgr2vr.h $vr13, $r7, 1 ++ ++# CHECK: vinsgr2vr.w $vr4, $r6, 3 ++# CHECK: encoding: [0xc4,0xec,0xeb,0x72] ++vinsgr2vr.w $vr4, $r6, 3 ++ ++# CHECK: vinsgr2vr.d $vr23, $r31, 0 ++# CHECK: encoding: [0xf7,0xf3,0xeb,0x72] ++vinsgr2vr.d $vr23, $r31, 0 ++ ++# CHECK: vpickve2gr.b $r24, $vr16, 10 ++# CHECK: encoding: [0x18,0xaa,0xef,0x72] ++vpickve2gr.b $r24, $vr16, 10 ++ ++# CHECK: vpickve2gr.h $r17, $vr25, 3 ++# CHECK: encoding: [0x31,0xcf,0xef,0x72] ++vpickve2gr.h $r17, $vr25, 3 ++ ++# CHECK: vpickve2gr.w $r30, $vr28, 2 ++# CHECK: encoding: [0x9e,0xeb,0xef,0x72] ++vpickve2gr.w $r30, $vr28, 2 ++ ++# CHECK: vpickve2gr.d $r25, $vr9, 1 ++# CHECK: encoding: [0x39,0xf5,0xef,0x72] ++vpickve2gr.d $r25, $vr9, 1 ++ ++# CHECK: vpickve2gr.bu $r31, $vr14, 2 ++# CHECK: encoding: [0xdf,0x89,0xf3,0x72] ++vpickve2gr.bu $r31, $vr14, 2 ++ ++# CHECK: vpickve2gr.hu $r12, $vr1, 6 ++# CHECK: encoding: [0x2c,0xd8,0xf3,0x72] ++vpickve2gr.hu $r12, $vr1, 6 ++ ++# CHECK: vpickve2gr.wu $r10, $vr17, 1 ++# CHECK: encoding: [0x2a,0xe6,0xf3,0x72] ++vpickve2gr.wu $r10, $vr17, 1 ++ ++# CHECK: vpickve2gr.du $r26, $vr8, 1 ++# CHECK: encoding: [0x1a,0xf5,0xf3,0x72] ++vpickve2gr.du $r26, $vr8, 1 ++ ++# CHECK: vreplvei.b $vr3, $vr6, 12 ++# CHECK: encoding: [0xc3,0xb0,0xf7,0x72] ++vreplvei.b $vr3, $vr6, 12 ++ ++# CHECK: vreplvei.h $vr22, $vr29, 7 ++# CHECK: encoding: [0xb6,0xdf,0xf7,0x72] ++vreplvei.h $vr22, $vr29, 7 ++ ++# CHECK: vreplvei.w $vr17, $vr26, 1 ++# CHECK: encoding: [0x51,0xe7,0xf7,0x72] ++vreplvei.w $vr17, $vr26, 1 ++ ++# CHECK: vreplvei.d $vr0, $vr17, 1 ++# CHECK: encoding: [0x20,0xf6,0xf7,0x72] ++vreplvei.d $vr0, $vr17, 1 ++ ++# CHECK: vsllwil.h.b $vr25, $vr14, 2 ++# CHECK: encoding: [0xd9,0x29,0x08,0x73] ++vsllwil.h.b $vr25, $vr14, 2 ++ ++# CHECK: vsllwil.w.h $vr24, $vr5, 1 ++# CHECK: encoding: [0xb8,0x44,0x08,0x73] ++vsllwil.w.h $vr24, $vr5, 1 ++ ++# CHECK: vsllwil.d.w $vr25, $vr14, 8 ++# CHECK: encoding: [0xd9,0xa1,0x08,0x73] ++vsllwil.d.w $vr25, $vr14, 8 ++ ++# CHECK: vextl.q.d $vr3, $vr22 ++# CHECK: encoding: [0xc3,0x02,0x09,0x73] ++vextl.q.d $vr3, $vr22 ++ ++# CHECK: vsllwil.hu.bu $vr11, $vr25, 3 ++# CHECK: encoding: [0x2b,0x2f,0x0c,0x73] ++vsllwil.hu.bu $vr11, $vr25, 3 ++ ++# CHECK: vsllwil.wu.hu $vr2, $vr26, 10 ++# CHECK: encoding: [0x42,0x6b,0x0c,0x73] ++vsllwil.wu.hu $vr2, $vr26, 10 ++ ++# CHECK: vsllwil.du.wu $vr18, $vr9, 28 ++# CHECK: encoding: [0x32,0xf1,0x0c,0x73] ++vsllwil.du.wu $vr18, $vr9, 28 ++ ++# CHECK: vextl.qu.du $vr13, $vr25 ++# CHECK: encoding: [0x2d,0x03,0x0d,0x73] ++vextl.qu.du $vr13, $vr25 ++ ++# CHECK: vbitclri.b $vr29, $vr24, 6 ++# CHECK: encoding: [0x1d,0x3b,0x10,0x73] ++vbitclri.b $vr29, $vr24, 6 ++ ++# CHECK: vbitclri.h $vr27, $vr15, 5 ++# CHECK: encoding: [0xfb,0x55,0x10,0x73] ++vbitclri.h $vr27, $vr15, 5 ++ ++# CHECK: vbitclri.w $vr11, $vr10, 8 ++# CHECK: encoding: [0x4b,0xa1,0x10,0x73] ++vbitclri.w $vr11, $vr10, 8 ++ ++# CHECK: vbitclri.d $vr4, $vr7, 15 ++# CHECK: encoding: [0xe4,0x3c,0x11,0x73] ++vbitclri.d $vr4, $vr7, 15 ++ ++# CHECK: vbitseti.b $vr24, $vr20, 3 ++# CHECK: encoding: [0x98,0x2e,0x14,0x73] ++vbitseti.b $vr24, $vr20, 3 ++ ++# CHECK: vbitseti.h $vr6, $vr8, 8 ++# CHECK: encoding: [0x06,0x61,0x14,0x73] ++vbitseti.h $vr6, $vr8, 8 ++ ++# CHECK: vbitseti.w $vr21, $vr9, 24 ++# CHECK: encoding: [0x35,0xe1,0x14,0x73] ++vbitseti.w $vr21, $vr9, 24 ++ ++# CHECK: vbitseti.d $vr28, $vr18, 30 ++# CHECK: encoding: [0x5c,0x7a,0x15,0x73] ++vbitseti.d $vr28, $vr18, 30 ++ ++# CHECK: vbitrevi.b $vr19, $vr31, 0 ++# CHECK: encoding: [0xf3,0x23,0x18,0x73] ++vbitrevi.b $vr19, $vr31, 0 ++ ++# CHECK: vbitrevi.h $vr18, $vr1, 0 ++# CHECK: encoding: [0x32,0x40,0x18,0x73] ++vbitrevi.h $vr18, $vr1, 0 ++ ++# CHECK: vbitrevi.w $vr25, $vr6, 18 ++# CHECK: encoding: [0xd9,0xc8,0x18,0x73] ++vbitrevi.w $vr25, $vr6, 18 ++ ++# CHECK: vbitrevi.d $vr8, $vr27, 22 ++# CHECK: encoding: [0x68,0x5b,0x19,0x73] ++vbitrevi.d $vr8, $vr27, 22 ++ ++# CHECK: vsat.b $vr21, $vr28, 2 ++# CHECK: encoding: [0x95,0x2b,0x24,0x73] ++vsat.b $vr21, $vr28, 2 ++ ++# CHECK: vsat.h $vr6, $vr5, 12 ++# CHECK: encoding: [0xa6,0x70,0x24,0x73] ++vsat.h $vr6, $vr5, 12 ++ ++# CHECK: vsat.w $vr3, $vr30, 16 ++# CHECK: encoding: [0xc3,0xc3,0x24,0x73] ++vsat.w $vr3, $vr30, 16 ++ ++# CHECK: vsat.d $vr0, $vr31, 24 ++# CHECK: encoding: [0xe0,0x63,0x25,0x73] ++vsat.d $vr0, $vr31, 24 ++ ++# CHECK: vsat.bu $vr20, $vr20, 2 ++# CHECK: encoding: [0x94,0x2a,0x28,0x73] ++vsat.bu $vr20, $vr20, 2 ++ ++# CHECK: vsat.hu $vr8, $vr6, 12 ++# CHECK: encoding: [0xc8,0x70,0x28,0x73] ++vsat.hu $vr8, $vr6, 12 ++ ++# CHECK: vsat.wu $vr18, $vr20, 26 ++# CHECK: encoding: [0x92,0xea,0x28,0x73] ++vsat.wu $vr18, $vr20, 26 ++ ++# CHECK: vsat.du $vr10, $vr6, 33 ++# CHECK: encoding: [0xca,0x84,0x29,0x73] ++vsat.du $vr10, $vr6, 33 ++ ++# CHECK: vslli.b $vr4, $vr19, 3 ++# CHECK: encoding: [0x64,0x2e,0x2c,0x73] ++vslli.b $vr4, $vr19, 3 ++ ++# CHECK: vslli.h $vr3, $vr23, 14 ++# CHECK: encoding: [0xe3,0x7a,0x2c,0x73] ++vslli.h $vr3, $vr23, 14 ++ ++# CHECK: vslli.w $vr22, $vr21, 6 ++# CHECK: encoding: [0xb6,0x9a,0x2c,0x73] ++vslli.w $vr22, $vr21, 6 ++ ++# CHECK: vslli.d $vr23, $vr15, 36 ++# CHECK: encoding: [0xf7,0x91,0x2d,0x73] ++vslli.d $vr23, $vr15, 36 ++ ++# CHECK: vsrli.b $vr5, $vr25, 4 ++# CHECK: encoding: [0x25,0x33,0x30,0x73] ++vsrli.b $vr5, $vr25, 4 ++ ++# CHECK: vsrli.h $vr9, $vr14, 9 ++# CHECK: encoding: [0xc9,0x65,0x30,0x73] ++vsrli.h $vr9, $vr14, 9 ++ ++# CHECK: vsrli.w $vr7, $vr24, 12 ++# CHECK: encoding: [0x07,0xb3,0x30,0x73] ++vsrli.w $vr7, $vr24, 12 ++ ++# CHECK: vsrli.d $vr15, $vr18, 63 ++# CHECK: encoding: [0x4f,0xfe,0x31,0x73] ++vsrli.d $vr15, $vr18, 63 ++ ++# CHECK: vsrai.b $vr6, $vr1, 3 ++# CHECK: encoding: [0x26,0x2c,0x34,0x73] ++vsrai.b $vr6, $vr1, 3 ++ ++# CHECK: vsrai.h $vr7, $vr29, 3 ++# CHECK: encoding: [0xa7,0x4f,0x34,0x73] ++vsrai.h $vr7, $vr29, 3 ++ ++# CHECK: vsrai.w $vr31, $vr27, 29 ++# CHECK: encoding: [0x7f,0xf7,0x34,0x73] ++vsrai.w $vr31, $vr27, 29 ++ ++# CHECK: vsrai.d $vr28, $vr30, 56 ++# CHECK: encoding: [0xdc,0xe3,0x35,0x73] ++vsrai.d $vr28, $vr30, 56 ++ ++# CHECK: vsrlni.b.h $vr2, $vr26, 2 ++# CHECK: encoding: [0x42,0x4b,0x40,0x73] ++vsrlni.b.h $vr2, $vr26, 2 ++ ++# CHECK: vsrlni.h.w $vr31, $vr14, 3 ++# CHECK: encoding: [0xdf,0x8d,0x40,0x73] ++vsrlni.h.w $vr31, $vr14, 3 ++ ++# CHECK: vsrlni.w.d $vr19, $vr4, 33 ++# CHECK: encoding: [0x93,0x84,0x41,0x73] ++vsrlni.w.d $vr19, $vr4, 33 ++ ++# CHECK: vsrlni.d.q $vr31, $vr3, 63 ++# CHECK: encoding: [0x7f,0xfc,0x42,0x73] ++vsrlni.d.q $vr31, $vr3, 63 ++ ++# CHECK: vsrlrni.b.h $vr26, $vr18, 0 ++# CHECK: encoding: [0x5a,0x42,0x44,0x73] ++vsrlrni.b.h $vr26, $vr18, 0 ++ ++# CHECK: vsrlrni.h.w $vr18, $vr22, 5 ++# CHECK: encoding: [0xd2,0x96,0x44,0x73] ++vsrlrni.h.w $vr18, $vr22, 5 ++ ++# CHECK: vsrlrni.w.d $vr24, $vr11, 21 ++# CHECK: encoding: [0x78,0x55,0x45,0x73] ++vsrlrni.w.d $vr24, $vr11, 21 ++ ++# CHECK: vsrlrni.d.q $vr6, $vr11, 37 ++# CHECK: encoding: [0x66,0x95,0x46,0x73] ++vsrlrni.d.q $vr6, $vr11, 37 ++ ++# CHECK: vssrlni.b.h $vr3, $vr21, 5 ++# CHECK: encoding: [0xa3,0x56,0x48,0x73] ++vssrlni.b.h $vr3, $vr21, 5 ++ ++# CHECK: vssrlni.h.w $vr6, $vr1, 16 ++# CHECK: encoding: [0x26,0xc0,0x48,0x73] ++vssrlni.h.w $vr6, $vr1, 16 ++ ++# CHECK: vssrlni.w.d $vr4, $vr21, 27 ++# CHECK: encoding: [0xa4,0x6e,0x49,0x73] ++vssrlni.w.d $vr4, $vr21, 27 ++ ++# CHECK: vssrlni.d.q $vr8, $vr18, 94 ++# CHECK: encoding: [0x48,0x7a,0x4b,0x73] ++vssrlni.d.q $vr8, $vr18, 94 ++ ++# CHECK: vssrlni.bu.h $vr6, $vr2, 5 ++# CHECK: encoding: [0x46,0x54,0x4c,0x73] ++vssrlni.bu.h $vr6, $vr2, 5 ++ ++# CHECK: vssrlni.hu.w $vr29, $vr29, 2 ++# CHECK: encoding: [0xbd,0x8b,0x4c,0x73] ++vssrlni.hu.w $vr29, $vr29, 2 ++ ++# CHECK: vssrlni.wu.d $vr28, $vr20, 47 ++# CHECK: encoding: [0x9c,0xbe,0x4d,0x73] ++vssrlni.wu.d $vr28, $vr20, 47 ++ ++# CHECK: vssrlni.du.q $vr22, $vr10, 82 ++# CHECK: encoding: [0x56,0x49,0x4f,0x73] ++vssrlni.du.q $vr22, $vr10, 82 ++ ++# CHECK: vssrlrni.b.h $vr17, $vr25, 10 ++# CHECK: encoding: [0x31,0x6b,0x50,0x73] ++vssrlrni.b.h $vr17, $vr25, 10 ++ ++# CHECK: vssrlrni.h.w $vr21, $vr29, 0 ++# CHECK: encoding: [0xb5,0x83,0x50,0x73] ++vssrlrni.h.w $vr21, $vr29, 0 ++ ++# CHECK: vssrlrni.w.d $vr9, $vr15, 63 ++# CHECK: encoding: [0xe9,0xfd,0x51,0x73] ++vssrlrni.w.d $vr9, $vr15, 63 ++ ++# CHECK: vssrlrni.d.q $vr4, $vr1, 117 ++# CHECK: encoding: [0x24,0xd4,0x53,0x73] ++vssrlrni.d.q $vr4, $vr1, 117 ++ ++# CHECK: vssrlrni.bu.h $vr25, $vr13, 3 ++# CHECK: encoding: [0xb9,0x4d,0x54,0x73] ++vssrlrni.bu.h $vr25, $vr13, 3 ++ ++# CHECK: vssrlrni.hu.w $vr30, $vr28, 7 ++# CHECK: encoding: [0x9e,0x9f,0x54,0x73] ++vssrlrni.hu.w $vr30, $vr28, 7 ++ ++# CHECK: vssrlrni.wu.d $vr16, $vr27, 11 ++# CHECK: encoding: [0x70,0x2f,0x55,0x73] ++vssrlrni.wu.d $vr16, $vr27, 11 ++ ++# CHECK: vssrlrni.du.q $vr20, $vr13, 63 ++# CHECK: encoding: [0xb4,0xfd,0x56,0x73] ++vssrlrni.du.q $vr20, $vr13, 63 ++ ++# CHECK: vsrani.b.h $vr3, $vr25, 4 ++# CHECK: encoding: [0x23,0x53,0x58,0x73] ++vsrani.b.h $vr3, $vr25, 4 ++ ++# CHECK: vsrani.h.w $vr12, $vr13, 17 ++# CHECK: encoding: [0xac,0xc5,0x58,0x73] ++vsrani.h.w $vr12, $vr13, 17 ++ ++# CHECK: vsrani.w.d $vr2, $vr6, 25 ++# CHECK: encoding: [0xc2,0x64,0x59,0x73] ++vsrani.w.d $vr2, $vr6, 25 ++ ++# CHECK: vsrani.d.q $vr12, $vr8, 105 ++# CHECK: encoding: [0x0c,0xa5,0x5b,0x73] ++vsrani.d.q $vr12, $vr8, 105 ++ ++# CHECK: vsrarni.b.h $vr27, $vr21, 2 ++# CHECK: encoding: [0xbb,0x4a,0x5c,0x73] ++vsrarni.b.h $vr27, $vr21, 2 ++ ++# CHECK: vsrarni.h.w $vr13, $vr3, 0 ++# CHECK: encoding: [0x6d,0x80,0x5c,0x73] ++vsrarni.h.w $vr13, $vr3, 0 ++ ++# CHECK: vsrarni.w.d $vr9, $vr31, 42 ++# CHECK: encoding: [0xe9,0xab,0x5d,0x73] ++vsrarni.w.d $vr9, $vr31, 42 ++ ++# CHECK: vsrarni.d.q $vr25, $vr5, 59 ++# CHECK: encoding: [0xb9,0xec,0x5e,0x73] ++vsrarni.d.q $vr25, $vr5, 59 ++ ++# CHECK: vssrani.b.h $vr8, $vr7, 12 ++# CHECK: encoding: [0xe8,0x70,0x60,0x73] ++vssrani.b.h $vr8, $vr7, 12 ++ ++# CHECK: vssrani.h.w $vr21, $vr18, 30 ++# CHECK: encoding: [0x55,0xfa,0x60,0x73] ++vssrani.h.w $vr21, $vr18, 30 ++ ++# CHECK: vssrani.w.d $vr23, $vr7, 51 ++# CHECK: encoding: [0xf7,0xcc,0x61,0x73] ++vssrani.w.d $vr23, $vr7, 51 ++ ++# CHECK: vssrani.d.q $vr12, $vr14, 8 ++# CHECK: encoding: [0xcc,0x21,0x62,0x73] ++vssrani.d.q $vr12, $vr14, 8 ++ ++# CHECK: vssrani.bu.h $vr19, $vr5, 12 ++# CHECK: encoding: [0xb3,0x70,0x64,0x73] ++vssrani.bu.h $vr19, $vr5, 12 ++ ++# CHECK: vssrani.hu.w $vr27, $vr25, 15 ++# CHECK: encoding: [0x3b,0xbf,0x64,0x73] ++vssrani.hu.w $vr27, $vr25, 15 ++ ++# CHECK: vssrani.wu.d $vr24, $vr28, 42 ++# CHECK: encoding: [0x98,0xab,0x65,0x73] ++vssrani.wu.d $vr24, $vr28, 42 ++ ++# CHECK: vssrani.du.q $vr4, $vr23, 63 ++# CHECK: encoding: [0xe4,0xfe,0x66,0x73] ++vssrani.du.q $vr4, $vr23, 63 ++ ++# CHECK: vssrarni.b.h $vr26, $vr8, 0 ++# CHECK: encoding: [0x1a,0x41,0x68,0x73] ++vssrarni.b.h $vr26, $vr8, 0 ++ ++# CHECK: vssrarni.h.w $vr4, $vr3, 25 ++# CHECK: encoding: [0x64,0xe4,0x68,0x73] ++vssrarni.h.w $vr4, $vr3, 25 ++ ++# CHECK: vssrarni.w.d $vr0, $vr25, 19 ++# CHECK: encoding: [0x20,0x4f,0x69,0x73] ++vssrarni.w.d $vr0, $vr25, 19 ++ ++# CHECK: vssrarni.d.q $vr20, $vr11, 106 ++# CHECK: encoding: [0x74,0xa9,0x6b,0x73] ++vssrarni.d.q $vr20, $vr11, 106 ++ ++# CHECK: vssrarni.bu.h $vr25, $vr28, 9 ++# CHECK: encoding: [0x99,0x67,0x6c,0x73] ++vssrarni.bu.h $vr25, $vr28, 9 ++ ++# CHECK: vssrarni.hu.w $vr20, $vr23, 12 ++# CHECK: encoding: [0xf4,0xb2,0x6c,0x73] ++vssrarni.hu.w $vr20, $vr23, 12 ++ ++# CHECK: vssrarni.wu.d $vr28, $vr23, 58 ++# CHECK: encoding: [0xfc,0xea,0x6d,0x73] ++vssrarni.wu.d $vr28, $vr23, 58 ++ ++# CHECK: vssrarni.du.q $vr1, $vr14, 93 ++# CHECK: encoding: [0xc1,0x75,0x6f,0x73] ++vssrarni.du.q $vr1, $vr14, 93 ++ ++# CHECK: vextrins.d $vr15, $vr27, 7 ++# CHECK: encoding: [0x6f,0x1f,0x80,0x73] ++vextrins.d $vr15, $vr27, 7 ++ ++# CHECK: vextrins.w $vr19, $vr0, 147 ++# CHECK: encoding: [0x13,0x4c,0x86,0x73] ++vextrins.w $vr19, $vr0, 147 ++ ++# CHECK: vextrins.h $vr29, $vr9, 69 ++# CHECK: encoding: [0x3d,0x15,0x89,0x73] ++vextrins.h $vr29, $vr9, 69 ++ ++# CHECK: vextrins.b $vr0, $vr21, 23 ++# CHECK: encoding: [0xa0,0x5e,0x8c,0x73] ++vextrins.b $vr0, $vr21, 23 ++ ++# CHECK: vshuf4i.b $vr19, $vr10, 188 ++# CHECK: encoding: [0x53,0xf1,0x92,0x73] ++vshuf4i.b $vr19, $vr10, 188 ++ ++# CHECK: vshuf4i.h $vr15, $vr1, 139 ++# CHECK: encoding: [0x2f,0x2c,0x96,0x73] ++vshuf4i.h $vr15, $vr1, 139 ++ ++# CHECK: vshuf4i.w $vr3, $vr5, 130 ++# CHECK: encoding: [0xa3,0x08,0x9a,0x73] ++vshuf4i.w $vr3, $vr5, 130 ++ ++# CHECK: vshuf4i.d $vr8, $vr29, 131 ++# CHECK: encoding: [0xa8,0x0f,0x9e,0x73] ++vshuf4i.d $vr8, $vr29, 131 ++ ++# CHECK: vbitseli.b $vr16, $vr25, 168 ++# CHECK: encoding: [0x30,0xa3,0xc6,0x73] ++vbitseli.b $vr16, $vr25, 168 ++ ++# CHECK: vandi.b $vr4, $vr23, 121 ++# CHECK: encoding: [0xe4,0xe6,0xd1,0x73] ++vandi.b $vr4, $vr23, 121 ++ ++# CHECK: vori.b $vr7, $vr10, 188 ++# CHECK: encoding: [0x47,0xf1,0xd6,0x73] ++vori.b $vr7, $vr10, 188 ++ ++# CHECK: vxori.b $vr9, $vr26, 216 ++# CHECK: encoding: [0x49,0x63,0xdb,0x73] ++vxori.b $vr9, $vr26, 216 ++ ++# CHECK: vnori.b $vr4, $vr28, 219 ++# CHECK: encoding: [0x84,0x6f,0xdf,0x73] ++vnori.b $vr4, $vr28, 219 ++ ++# CHECK: vldi $vr22, -3742 ++# CHECK: encoding: [0x56,0x2c,0xe2,0x73] ++vldi $vr22, -3742 ++ ++# CHECK: vpermi.w $vr14, $vr29, 16 ++# CHECK: encoding: [0xae,0x43,0xe4,0x73] ++vpermi.w $vr14, $vr29, 16 ++ ++# CHECK: xvseq.b $xr11, $xr23, $xr21 ++# CHECK: encoding: [0xeb,0x56,0x00,0x74] ++xvseq.b $xr11, $xr23, $xr21 ++ ++# CHECK: xvseq.h $xr6, $xr10, $xr27 ++# CHECK: encoding: [0x46,0xed,0x00,0x74] ++xvseq.h $xr6, $xr10, $xr27 ++ ++# CHECK: xvseq.w $xr19, $xr27, $xr21 ++# CHECK: encoding: [0x73,0x57,0x01,0x74] ++xvseq.w $xr19, $xr27, $xr21 ++ ++# CHECK: xvseq.d $xr18, $xr4, $xr2 ++# CHECK: encoding: [0x92,0x88,0x01,0x74] ++xvseq.d $xr18, $xr4, $xr2 ++ ++# CHECK: xvsle.b $xr19, $xr10, $xr5 ++# CHECK: encoding: [0x53,0x15,0x02,0x74] ++xvsle.b $xr19, $xr10, $xr5 ++ ++# CHECK: xvsle.h $xr10, $xr25, $xr14 ++# CHECK: encoding: [0x2a,0xbb,0x02,0x74] ++xvsle.h $xr10, $xr25, $xr14 ++ ++# CHECK: xvsle.w $xr17, $xr23, $xr18 ++# CHECK: encoding: [0xf1,0x4a,0x03,0x74] ++xvsle.w $xr17, $xr23, $xr18 ++ ++# CHECK: xvsle.d $xr15, $xr7, $xr9 ++# CHECK: encoding: [0xef,0xa4,0x03,0x74] ++xvsle.d $xr15, $xr7, $xr9 ++ ++# CHECK: xvsle.bu $xr5, $xr14, $xr15 ++# CHECK: encoding: [0xc5,0x3d,0x04,0x74] ++xvsle.bu $xr5, $xr14, $xr15 ++ ++# CHECK: xvsle.hu $xr9, $xr25, $xr25 ++# CHECK: encoding: [0x29,0xe7,0x04,0x74] ++xvsle.hu $xr9, $xr25, $xr25 ++ ++# CHECK: xvsle.wu $xr28, $xr31, $xr16 ++# CHECK: encoding: [0xfc,0x43,0x05,0x74] ++xvsle.wu $xr28, $xr31, $xr16 ++ ++# CHECK: xvsle.du $xr17, $xr24, $xr24 ++# CHECK: encoding: [0x11,0xe3,0x05,0x74] ++xvsle.du $xr17, $xr24, $xr24 ++ ++# CHECK: xvslt.b $xr18, $xr28, $xr25 ++# CHECK: encoding: [0x92,0x67,0x06,0x74] ++xvslt.b $xr18, $xr28, $xr25 ++ ++# CHECK: xvslt.h $xr29, $xr6, $xr2 ++# CHECK: encoding: [0xdd,0x88,0x06,0x74] ++xvslt.h $xr29, $xr6, $xr2 ++ ++# CHECK: xvslt.w $xr14, $xr10, $xr5 ++# CHECK: encoding: [0x4e,0x15,0x07,0x74] ++xvslt.w $xr14, $xr10, $xr5 ++ ++# CHECK: xvslt.d $xr19, $xr30, $xr15 ++# CHECK: encoding: [0xd3,0xbf,0x07,0x74] ++xvslt.d $xr19, $xr30, $xr15 ++ ++# CHECK: xvslt.bu $xr14, $xr6, $xr27 ++# CHECK: encoding: [0xce,0x6c,0x08,0x74] ++xvslt.bu $xr14, $xr6, $xr27 ++ ++# CHECK: xvslt.hu $xr27, $xr26, $xr5 ++# CHECK: encoding: [0x5b,0x97,0x08,0x74] ++xvslt.hu $xr27, $xr26, $xr5 ++ ++# CHECK: xvslt.wu $xr6, $xr9, $xr10 ++# CHECK: encoding: [0x26,0x29,0x09,0x74] ++xvslt.wu $xr6, $xr9, $xr10 ++ ++# CHECK: xvslt.du $xr13, $xr12, $xr28 ++# CHECK: encoding: [0x8d,0xf1,0x09,0x74] ++xvslt.du $xr13, $xr12, $xr28 ++ ++# CHECK: xvadd.b $xr0, $xr6, $xr3 ++# CHECK: encoding: [0xc0,0x0c,0x0a,0x74] ++xvadd.b $xr0, $xr6, $xr3 ++ ++# CHECK: xvadd.h $xr8, $xr11, $xr10 ++# CHECK: encoding: [0x68,0xa9,0x0a,0x74] ++xvadd.h $xr8, $xr11, $xr10 ++ ++# CHECK: xvadd.w $xr5, $xr6, $xr21 ++# CHECK: encoding: [0xc5,0x54,0x0b,0x74] ++xvadd.w $xr5, $xr6, $xr21 ++ ++# CHECK: xvadd.d $xr4, $xr21, $xr10 ++# CHECK: encoding: [0xa4,0xaa,0x0b,0x74] ++xvadd.d $xr4, $xr21, $xr10 ++ ++# CHECK: xvsub.b $xr16, $xr0, $xr30 ++# CHECK: encoding: [0x10,0x78,0x0c,0x74] ++xvsub.b $xr16, $xr0, $xr30 ++ ++# CHECK: xvsub.h $xr28, $xr11, $xr18 ++# CHECK: encoding: [0x7c,0xc9,0x0c,0x74] ++xvsub.h $xr28, $xr11, $xr18 ++ ++# CHECK: xvsub.w $xr13, $xr2, $xr13 ++# CHECK: encoding: [0x4d,0x34,0x0d,0x74] ++xvsub.w $xr13, $xr2, $xr13 ++ ++# CHECK: xvsub.d $xr0, $xr25, $xr21 ++# CHECK: encoding: [0x20,0xd7,0x0d,0x74] ++xvsub.d $xr0, $xr25, $xr21 ++ ++# CHECK: xvaddwev.h.b $xr8, $xr30, $xr11 ++# CHECK: encoding: [0xc8,0x2f,0x1e,0x74] ++xvaddwev.h.b $xr8, $xr30, $xr11 ++ ++# CHECK: xvaddwev.w.h $xr10, $xr30, $xr5 ++# CHECK: encoding: [0xca,0x97,0x1e,0x74] ++xvaddwev.w.h $xr10, $xr30, $xr5 ++ ++# CHECK: xvaddwev.d.w $xr20, $xr25, $xr1 ++# CHECK: encoding: [0x34,0x07,0x1f,0x74] ++xvaddwev.d.w $xr20, $xr25, $xr1 ++ ++# CHECK: xvaddwev.q.d $xr22, $xr24, $xr24 ++# CHECK: encoding: [0x16,0xe3,0x1f,0x74] ++xvaddwev.q.d $xr22, $xr24, $xr24 ++ ++# CHECK: xvsubwev.h.b $xr1, $xr25, $xr1 ++# CHECK: encoding: [0x21,0x07,0x20,0x74] ++xvsubwev.h.b $xr1, $xr25, $xr1 ++ ++# CHECK: xvsubwev.w.h $xr4, $xr30, $xr11 ++# CHECK: encoding: [0xc4,0xaf,0x20,0x74] ++xvsubwev.w.h $xr4, $xr30, $xr11 ++ ++# CHECK: xvsubwev.d.w $xr6, $xr2, $xr18 ++# CHECK: encoding: [0x46,0x48,0x21,0x74] ++xvsubwev.d.w $xr6, $xr2, $xr18 ++ ++# CHECK: xvsubwev.q.d $xr0, $xr11, $xr31 ++# CHECK: encoding: [0x60,0xfd,0x21,0x74] ++xvsubwev.q.d $xr0, $xr11, $xr31 ++ ++# CHECK: xvaddwod.h.b $xr4, $xr4, $xr25 ++# CHECK: encoding: [0x84,0x64,0x22,0x74] ++xvaddwod.h.b $xr4, $xr4, $xr25 ++ ++# CHECK: xvaddwod.w.h $xr12, $xr25, $xr29 ++# CHECK: encoding: [0x2c,0xf7,0x22,0x74] ++xvaddwod.w.h $xr12, $xr25, $xr29 ++ ++# CHECK: xvaddwod.d.w $xr16, $xr22, $xr19 ++# CHECK: encoding: [0xd0,0x4e,0x23,0x74] ++xvaddwod.d.w $xr16, $xr22, $xr19 ++ ++# CHECK: xvaddwod.q.d $xr23, $xr25, $xr14 ++# CHECK: encoding: [0x37,0xbb,0x23,0x74] ++xvaddwod.q.d $xr23, $xr25, $xr14 ++ ++# CHECK: xvsubwod.h.b $xr1, $xr16, $xr8 ++# CHECK: encoding: [0x01,0x22,0x24,0x74] ++xvsubwod.h.b $xr1, $xr16, $xr8 ++ ++# CHECK: xvsubwod.w.h $xr5, $xr11, $xr8 ++# CHECK: encoding: [0x65,0xa1,0x24,0x74] ++xvsubwod.w.h $xr5, $xr11, $xr8 ++ ++# CHECK: xvsubwod.d.w $xr20, $xr7, $xr0 ++# CHECK: encoding: [0xf4,0x00,0x25,0x74] ++xvsubwod.d.w $xr20, $xr7, $xr0 ++ ++# CHECK: xvsubwod.q.d $xr17, $xr23, $xr20 ++# CHECK: encoding: [0xf1,0xd2,0x25,0x74] ++xvsubwod.q.d $xr17, $xr23, $xr20 ++ ++# CHECK: xvaddwev.h.bu $xr15, $xr10, $xr31 ++# CHECK: encoding: [0x4f,0x7d,0x2e,0x74] ++xvaddwev.h.bu $xr15, $xr10, $xr31 ++ ++# CHECK: xvaddwev.w.hu $xr21, $xr24, $xr28 ++# CHECK: encoding: [0x15,0xf3,0x2e,0x74] ++xvaddwev.w.hu $xr21, $xr24, $xr28 ++ ++# CHECK: xvaddwev.d.wu $xr9, $xr31, $xr14 ++# CHECK: encoding: [0xe9,0x3b,0x2f,0x74] ++xvaddwev.d.wu $xr9, $xr31, $xr14 ++ ++# CHECK: xvaddwev.q.du $xr25, $xr1, $xr8 ++# CHECK: encoding: [0x39,0xa0,0x2f,0x74] ++xvaddwev.q.du $xr25, $xr1, $xr8 ++ ++# CHECK: xvsubwev.h.bu $xr30, $xr31, $xr13 ++# CHECK: encoding: [0xfe,0x37,0x30,0x74] ++xvsubwev.h.bu $xr30, $xr31, $xr13 ++ ++# CHECK: xvsubwev.w.hu $xr1, $xr28, $xr1 ++# CHECK: encoding: [0x81,0x87,0x30,0x74] ++xvsubwev.w.hu $xr1, $xr28, $xr1 ++ ++# CHECK: xvsubwev.d.wu $xr29, $xr23, $xr29 ++# CHECK: encoding: [0xfd,0x76,0x31,0x74] ++xvsubwev.d.wu $xr29, $xr23, $xr29 ++ ++# CHECK: xvsubwev.q.du $xr13, $xr16, $xr27 ++# CHECK: encoding: [0x0d,0xee,0x31,0x74] ++xvsubwev.q.du $xr13, $xr16, $xr27 ++ ++# CHECK: xvaddwod.h.bu $xr13, $xr29, $xr2 ++# CHECK: encoding: [0xad,0x0b,0x32,0x74] ++xvaddwod.h.bu $xr13, $xr29, $xr2 ++ ++# CHECK: xvaddwod.w.hu $xr14, $xr10, $xr13 ++# CHECK: encoding: [0x4e,0xb5,0x32,0x74] ++xvaddwod.w.hu $xr14, $xr10, $xr13 ++ ++# CHECK: xvaddwod.d.wu $xr30, $xr26, $xr10 ++# CHECK: encoding: [0x5e,0x2b,0x33,0x74] ++xvaddwod.d.wu $xr30, $xr26, $xr10 ++ ++# CHECK: xvaddwod.q.du $xr2, $xr13, $xr0 ++# CHECK: encoding: [0xa2,0x81,0x33,0x74] ++xvaddwod.q.du $xr2, $xr13, $xr0 ++ ++# CHECK: xvsubwod.h.bu $xr6, $xr22, $xr5 ++# CHECK: encoding: [0xc6,0x16,0x34,0x74] ++xvsubwod.h.bu $xr6, $xr22, $xr5 ++ ++# CHECK: xvsubwod.w.hu $xr19, $xr21, $xr8 ++# CHECK: encoding: [0xb3,0xa2,0x34,0x74] ++xvsubwod.w.hu $xr19, $xr21, $xr8 ++ ++# CHECK: xvsubwod.d.wu $xr16, $xr11, $xr30 ++# CHECK: encoding: [0x70,0x79,0x35,0x74] ++xvsubwod.d.wu $xr16, $xr11, $xr30 ++ ++# CHECK: xvsubwod.q.du $xr1, $xr26, $xr9 ++# CHECK: encoding: [0x41,0xa7,0x35,0x74] ++xvsubwod.q.du $xr1, $xr26, $xr9 ++ ++# CHECK: xvaddwev.h.bu.b $xr5, $xr13, $xr2 ++# CHECK: encoding: [0xa5,0x09,0x3e,0x74] ++xvaddwev.h.bu.b $xr5, $xr13, $xr2 ++ ++# CHECK: xvaddwev.w.hu.h $xr17, $xr21, $xr20 ++# CHECK: encoding: [0xb1,0xd2,0x3e,0x74] ++xvaddwev.w.hu.h $xr17, $xr21, $xr20 ++ ++# CHECK: xvaddwev.d.wu.w $xr11, $xr27, $xr19 ++# CHECK: encoding: [0x6b,0x4f,0x3f,0x74] ++xvaddwev.d.wu.w $xr11, $xr27, $xr19 ++ ++# CHECK: xvaddwev.q.du.d $xr20, $xr21, $xr29 ++# CHECK: encoding: [0xb4,0xf6,0x3f,0x74] ++xvaddwev.q.du.d $xr20, $xr21, $xr29 ++ ++# CHECK: xvaddwod.h.bu.b $xr1, $xr6, $xr14 ++# CHECK: encoding: [0xc1,0x38,0x40,0x74] ++xvaddwod.h.bu.b $xr1, $xr6, $xr14 ++ ++# CHECK: xvaddwod.w.hu.h $xr7, $xr29, $xr11 ++# CHECK: encoding: [0xa7,0xaf,0x40,0x74] ++xvaddwod.w.hu.h $xr7, $xr29, $xr11 ++ ++# CHECK: xvaddwod.d.wu.w $xr16, $xr10, $xr14 ++# CHECK: encoding: [0x50,0x39,0x41,0x74] ++xvaddwod.d.wu.w $xr16, $xr10, $xr14 ++ ++# CHECK: xvaddwod.q.du.d $xr10, $xr11, $xr23 ++# CHECK: encoding: [0x6a,0xdd,0x41,0x74] ++xvaddwod.q.du.d $xr10, $xr11, $xr23 ++ ++# CHECK: xvsadd.b $xr24, $xr10, $xr28 ++# CHECK: encoding: [0x58,0x71,0x46,0x74] ++xvsadd.b $xr24, $xr10, $xr28 ++ ++# CHECK: xvsadd.h $xr19, $xr18, $xr17 ++# CHECK: encoding: [0x53,0xc6,0x46,0x74] ++xvsadd.h $xr19, $xr18, $xr17 ++ ++# CHECK: xvsadd.w $xr2, $xr6, $xr12 ++# CHECK: encoding: [0xc2,0x30,0x47,0x74] ++xvsadd.w $xr2, $xr6, $xr12 ++ ++# CHECK: xvsadd.d $xr15, $xr18, $xr29 ++# CHECK: encoding: [0x4f,0xf6,0x47,0x74] ++xvsadd.d $xr15, $xr18, $xr29 ++ ++# CHECK: xvssub.b $xr15, $xr29, $xr16 ++# CHECK: encoding: [0xaf,0x43,0x48,0x74] ++xvssub.b $xr15, $xr29, $xr16 ++ ++# CHECK: xvssub.h $xr28, $xr3, $xr9 ++# CHECK: encoding: [0x7c,0xa4,0x48,0x74] ++xvssub.h $xr28, $xr3, $xr9 ++ ++# CHECK: xvssub.w $xr8, $xr20, $xr15 ++# CHECK: encoding: [0x88,0x3e,0x49,0x74] ++xvssub.w $xr8, $xr20, $xr15 ++ ++# CHECK: xvssub.d $xr23, $xr8, $xr19 ++# CHECK: encoding: [0x17,0xcd,0x49,0x74] ++xvssub.d $xr23, $xr8, $xr19 ++ ++# CHECK: xvsadd.bu $xr12, $xr4, $xr16 ++# CHECK: encoding: [0x8c,0x40,0x4a,0x74] ++xvsadd.bu $xr12, $xr4, $xr16 ++ ++# CHECK: xvsadd.hu $xr9, $xr26, $xr20 ++# CHECK: encoding: [0x49,0xd3,0x4a,0x74] ++xvsadd.hu $xr9, $xr26, $xr20 ++ ++# CHECK: xvsadd.wu $xr30, $xr15, $xr28 ++# CHECK: encoding: [0xfe,0x71,0x4b,0x74] ++xvsadd.wu $xr30, $xr15, $xr28 ++ ++# CHECK: xvsadd.du $xr15, $xr13, $xr28 ++# CHECK: encoding: [0xaf,0xf1,0x4b,0x74] ++xvsadd.du $xr15, $xr13, $xr28 ++ ++# CHECK: xvssub.bu $xr10, $xr3, $xr15 ++# CHECK: encoding: [0x6a,0x3c,0x4c,0x74] ++xvssub.bu $xr10, $xr3, $xr15 ++ ++# CHECK: xvssub.hu $xr0, $xr12, $xr2 ++# CHECK: encoding: [0x80,0x89,0x4c,0x74] ++xvssub.hu $xr0, $xr12, $xr2 ++ ++# CHECK: xvssub.wu $xr30, $xr10, $xr23 ++# CHECK: encoding: [0x5e,0x5d,0x4d,0x74] ++xvssub.wu $xr30, $xr10, $xr23 ++ ++# CHECK: xvssub.du $xr9, $xr30, $xr14 ++# CHECK: encoding: [0xc9,0xbb,0x4d,0x74] ++xvssub.du $xr9, $xr30, $xr14 ++ ++# CHECK: xvhaddw.h.b $xr25, $xr5, $xr18 ++# CHECK: encoding: [0xb9,0x48,0x54,0x74] ++xvhaddw.h.b $xr25, $xr5, $xr18 ++ ++# CHECK: xvhaddw.w.h $xr7, $xr20, $xr19 ++# CHECK: encoding: [0x87,0xce,0x54,0x74] ++xvhaddw.w.h $xr7, $xr20, $xr19 ++ ++# CHECK: xvhaddw.d.w $xr23, $xr5, $xr4 ++# CHECK: encoding: [0xb7,0x10,0x55,0x74] ++xvhaddw.d.w $xr23, $xr5, $xr4 ++ ++# CHECK: xvhaddw.q.d $xr17, $xr7, $xr25 ++# CHECK: encoding: [0xf1,0xe4,0x55,0x74] ++xvhaddw.q.d $xr17, $xr7, $xr25 ++ ++# CHECK: xvhsubw.h.b $xr29, $xr18, $xr19 ++# CHECK: encoding: [0x5d,0x4e,0x56,0x74] ++xvhsubw.h.b $xr29, $xr18, $xr19 ++ ++# CHECK: xvhsubw.w.h $xr30, $xr28, $xr3 ++# CHECK: encoding: [0x9e,0x8f,0x56,0x74] ++xvhsubw.w.h $xr30, $xr28, $xr3 ++ ++# CHECK: xvhsubw.d.w $xr5, $xr9, $xr13 ++# CHECK: encoding: [0x25,0x35,0x57,0x74] ++xvhsubw.d.w $xr5, $xr9, $xr13 ++ ++# CHECK: xvhsubw.q.d $xr20, $xr12, $xr29 ++# CHECK: encoding: [0x94,0xf5,0x57,0x74] ++xvhsubw.q.d $xr20, $xr12, $xr29 ++ ++# CHECK: xvhaddw.hu.bu $xr11, $xr10, $xr7 ++# CHECK: encoding: [0x4b,0x1d,0x58,0x74] ++xvhaddw.hu.bu $xr11, $xr10, $xr7 ++ ++# CHECK: xvhaddw.wu.hu $xr16, $xr21, $xr21 ++# CHECK: encoding: [0xb0,0xd6,0x58,0x74] ++xvhaddw.wu.hu $xr16, $xr21, $xr21 ++ ++# CHECK: xvhaddw.du.wu $xr17, $xr31, $xr8 ++# CHECK: encoding: [0xf1,0x23,0x59,0x74] ++xvhaddw.du.wu $xr17, $xr31, $xr8 ++ ++# CHECK: xvhaddw.qu.du $xr2, $xr4, $xr11 ++# CHECK: encoding: [0x82,0xac,0x59,0x74] ++xvhaddw.qu.du $xr2, $xr4, $xr11 ++ ++# CHECK: xvhsubw.hu.bu $xr21, $xr14, $xr8 ++# CHECK: encoding: [0xd5,0x21,0x5a,0x74] ++xvhsubw.hu.bu $xr21, $xr14, $xr8 ++ ++# CHECK: xvhsubw.wu.hu $xr25, $xr0, $xr27 ++# CHECK: encoding: [0x19,0xec,0x5a,0x74] ++xvhsubw.wu.hu $xr25, $xr0, $xr27 ++ ++# CHECK: xvhsubw.du.wu $xr4, $xr16, $xr30 ++# CHECK: encoding: [0x04,0x7a,0x5b,0x74] ++xvhsubw.du.wu $xr4, $xr16, $xr30 ++ ++# CHECK: xvhsubw.qu.du $xr11, $xr9, $xr6 ++# CHECK: encoding: [0x2b,0x99,0x5b,0x74] ++xvhsubw.qu.du $xr11, $xr9, $xr6 ++ ++# CHECK: xvadda.b $xr14, $xr21, $xr26 ++# CHECK: encoding: [0xae,0x6a,0x5c,0x74] ++xvadda.b $xr14, $xr21, $xr26 ++ ++# CHECK: xvadda.h $xr21, $xr30, $xr21 ++# CHECK: encoding: [0xd5,0xd7,0x5c,0x74] ++xvadda.h $xr21, $xr30, $xr21 ++ ++# CHECK: xvadda.w $xr31, $xr19, $xr19 ++# CHECK: encoding: [0x7f,0x4e,0x5d,0x74] ++xvadda.w $xr31, $xr19, $xr19 ++ ++# CHECK: xvadda.d $xr9, $xr4, $xr31 ++# CHECK: encoding: [0x89,0xfc,0x5d,0x74] ++xvadda.d $xr9, $xr4, $xr31 ++ ++# CHECK: xvabsd.b $xr20, $xr19, $xr13 ++# CHECK: encoding: [0x74,0x36,0x60,0x74] ++xvabsd.b $xr20, $xr19, $xr13 ++ ++# CHECK: xvabsd.h $xr20, $xr7, $xr10 ++# CHECK: encoding: [0xf4,0xa8,0x60,0x74] ++xvabsd.h $xr20, $xr7, $xr10 ++ ++# CHECK: xvabsd.w $xr23, $xr31, $xr0 ++# CHECK: encoding: [0xf7,0x03,0x61,0x74] ++xvabsd.w $xr23, $xr31, $xr0 ++ ++# CHECK: xvabsd.d $xr7, $xr17, $xr14 ++# CHECK: encoding: [0x27,0xba,0x61,0x74] ++xvabsd.d $xr7, $xr17, $xr14 ++ ++# CHECK: xvabsd.bu $xr12, $xr23, $xr6 ++# CHECK: encoding: [0xec,0x1a,0x62,0x74] ++xvabsd.bu $xr12, $xr23, $xr6 ++ ++# CHECK: xvabsd.hu $xr16, $xr30, $xr19 ++# CHECK: encoding: [0xd0,0xcf,0x62,0x74] ++xvabsd.hu $xr16, $xr30, $xr19 ++ ++# CHECK: xvabsd.wu $xr19, $xr5, $xr26 ++# CHECK: encoding: [0xb3,0x68,0x63,0x74] ++xvabsd.wu $xr19, $xr5, $xr26 ++ ++# CHECK: xvabsd.du $xr0, $xr12, $xr7 ++# CHECK: encoding: [0x80,0x9d,0x63,0x74] ++xvabsd.du $xr0, $xr12, $xr7 ++ ++# CHECK: xvavg.b $xr23, $xr31, $xr25 ++# CHECK: encoding: [0xf7,0x67,0x64,0x74] ++xvavg.b $xr23, $xr31, $xr25 ++ ++# CHECK: xvavg.h $xr27, $xr2, $xr27 ++# CHECK: encoding: [0x5b,0xec,0x64,0x74] ++xvavg.h $xr27, $xr2, $xr27 ++ ++# CHECK: xvavg.w $xr20, $xr0, $xr16 ++# CHECK: encoding: [0x14,0x40,0x65,0x74] ++xvavg.w $xr20, $xr0, $xr16 ++ ++# CHECK: xvavg.d $xr13, $xr9, $xr10 ++# CHECK: encoding: [0x2d,0xa9,0x65,0x74] ++xvavg.d $xr13, $xr9, $xr10 ++ ++# CHECK: xvavg.bu $xr31, $xr30, $xr4 ++# CHECK: encoding: [0xdf,0x13,0x66,0x74] ++xvavg.bu $xr31, $xr30, $xr4 ++ ++# CHECK: xvavg.hu $xr22, $xr17, $xr5 ++# CHECK: encoding: [0x36,0x96,0x66,0x74] ++xvavg.hu $xr22, $xr17, $xr5 ++ ++# CHECK: xvavg.wu $xr21, $xr29, $xr17 ++# CHECK: encoding: [0xb5,0x47,0x67,0x74] ++xvavg.wu $xr21, $xr29, $xr17 ++ ++# CHECK: xvavg.du $xr11, $xr5, $xr29 ++# CHECK: encoding: [0xab,0xf4,0x67,0x74] ++xvavg.du $xr11, $xr5, $xr29 ++ ++# CHECK: xvavgr.b $xr23, $xr13, $xr13 ++# CHECK: encoding: [0xb7,0x35,0x68,0x74] ++xvavgr.b $xr23, $xr13, $xr13 ++ ++# CHECK: xvavgr.h $xr30, $xr20, $xr31 ++# CHECK: encoding: [0x9e,0xfe,0x68,0x74] ++xvavgr.h $xr30, $xr20, $xr31 ++ ++# CHECK: xvavgr.w $xr29, $xr28, $xr9 ++# CHECK: encoding: [0x9d,0x27,0x69,0x74] ++xvavgr.w $xr29, $xr28, $xr9 ++ ++# CHECK: xvavgr.d $xr21, $xr20, $xr8 ++# CHECK: encoding: [0x95,0xa2,0x69,0x74] ++xvavgr.d $xr21, $xr20, $xr8 ++ ++# CHECK: xvavgr.bu $xr0, $xr9, $xr4 ++# CHECK: encoding: [0x20,0x11,0x6a,0x74] ++xvavgr.bu $xr0, $xr9, $xr4 ++ ++# CHECK: xvavgr.hu $xr3, $xr0, $xr27 ++# CHECK: encoding: [0x03,0xec,0x6a,0x74] ++xvavgr.hu $xr3, $xr0, $xr27 ++ ++# CHECK: xvavgr.wu $xr2, $xr30, $xr21 ++# CHECK: encoding: [0xc2,0x57,0x6b,0x74] ++xvavgr.wu $xr2, $xr30, $xr21 ++ ++# CHECK: xvavgr.du $xr22, $xr21, $xr17 ++# CHECK: encoding: [0xb6,0xc6,0x6b,0x74] ++xvavgr.du $xr22, $xr21, $xr17 ++ ++# CHECK: xvmax.b $xr1, $xr20, $xr19 ++# CHECK: encoding: [0x81,0x4e,0x70,0x74] ++xvmax.b $xr1, $xr20, $xr19 ++ ++# CHECK: xvmax.h $xr0, $xr17, $xr14 ++# CHECK: encoding: [0x20,0xba,0x70,0x74] ++xvmax.h $xr0, $xr17, $xr14 ++ ++# CHECK: xvmax.w $xr0, $xr8, $xr16 ++# CHECK: encoding: [0x00,0x41,0x71,0x74] ++xvmax.w $xr0, $xr8, $xr16 ++ ++# CHECK: xvmax.d $xr16, $xr23, $xr16 ++# CHECK: encoding: [0xf0,0xc2,0x71,0x74] ++xvmax.d $xr16, $xr23, $xr16 ++ ++# CHECK: xvmin.b $xr20, $xr6, $xr14 ++# CHECK: encoding: [0xd4,0x38,0x72,0x74] ++xvmin.b $xr20, $xr6, $xr14 ++ ++# CHECK: xvmin.h $xr4, $xr3, $xr24 ++# CHECK: encoding: [0x64,0xe0,0x72,0x74] ++xvmin.h $xr4, $xr3, $xr24 ++ ++# CHECK: xvmin.w $xr5, $xr2, $xr23 ++# CHECK: encoding: [0x45,0x5c,0x73,0x74] ++xvmin.w $xr5, $xr2, $xr23 ++ ++# CHECK: xvmin.d $xr31, $xr23, $xr26 ++# CHECK: encoding: [0xff,0xea,0x73,0x74] ++xvmin.d $xr31, $xr23, $xr26 ++ ++# CHECK: xvmax.bu $xr14, $xr13, $xr3 ++# CHECK: encoding: [0xae,0x0d,0x74,0x74] ++xvmax.bu $xr14, $xr13, $xr3 ++ ++# CHECK: xvmax.hu $xr22, $xr17, $xr4 ++# CHECK: encoding: [0x36,0x92,0x74,0x74] ++xvmax.hu $xr22, $xr17, $xr4 ++ ++# CHECK: xvmax.wu $xr17, $xr13, $xr29 ++# CHECK: encoding: [0xb1,0x75,0x75,0x74] ++xvmax.wu $xr17, $xr13, $xr29 ++ ++# CHECK: xvmax.du $xr13, $xr2, $xr0 ++# CHECK: encoding: [0x4d,0x80,0x75,0x74] ++xvmax.du $xr13, $xr2, $xr0 ++ ++# CHECK: xvmin.bu $xr18, $xr31, $xr27 ++# CHECK: encoding: [0xf2,0x6f,0x76,0x74] ++xvmin.bu $xr18, $xr31, $xr27 ++ ++# CHECK: xvmin.hu $xr2, $xr10, $xr14 ++# CHECK: encoding: [0x42,0xb9,0x76,0x74] ++xvmin.hu $xr2, $xr10, $xr14 ++ ++# CHECK: xvmin.wu $xr31, $xr8, $xr26 ++# CHECK: encoding: [0x1f,0x69,0x77,0x74] ++xvmin.wu $xr31, $xr8, $xr26 ++ ++# CHECK: xvmin.du $xr12, $xr26, $xr9 ++# CHECK: encoding: [0x4c,0xa7,0x77,0x74] ++xvmin.du $xr12, $xr26, $xr9 ++ ++# CHECK: xvmul.b $xr26, $xr2, $xr3 ++# CHECK: encoding: [0x5a,0x0c,0x84,0x74] ++xvmul.b $xr26, $xr2, $xr3 ++ ++# CHECK: xvmul.h $xr16, $xr29, $xr5 ++# CHECK: encoding: [0xb0,0x97,0x84,0x74] ++xvmul.h $xr16, $xr29, $xr5 ++ ++# CHECK: xvmul.w $xr19, $xr1, $xr3 ++# CHECK: encoding: [0x33,0x0c,0x85,0x74] ++xvmul.w $xr19, $xr1, $xr3 ++ ++# CHECK: xvmul.d $xr15, $xr15, $xr0 ++# CHECK: encoding: [0xef,0x81,0x85,0x74] ++xvmul.d $xr15, $xr15, $xr0 ++ ++# CHECK: xvmuh.b $xr9, $xr12, $xr9 ++# CHECK: encoding: [0x89,0x25,0x86,0x74] ++xvmuh.b $xr9, $xr12, $xr9 ++ ++# CHECK: xvmuh.h $xr8, $xr23, $xr16 ++# CHECK: encoding: [0xe8,0xc2,0x86,0x74] ++xvmuh.h $xr8, $xr23, $xr16 ++ ++# CHECK: xvmuh.w $xr29, $xr6, $xr11 ++# CHECK: encoding: [0xdd,0x2c,0x87,0x74] ++xvmuh.w $xr29, $xr6, $xr11 ++ ++# CHECK: xvmuh.d $xr3, $xr18, $xr7 ++# CHECK: encoding: [0x43,0x9e,0x87,0x74] ++xvmuh.d $xr3, $xr18, $xr7 ++ ++# CHECK: xvmuh.bu $xr3, $xr7, $xr19 ++# CHECK: encoding: [0xe3,0x4c,0x88,0x74] ++xvmuh.bu $xr3, $xr7, $xr19 ++ ++# CHECK: xvmuh.hu $xr13, $xr1, $xr18 ++# CHECK: encoding: [0x2d,0xc8,0x88,0x74] ++xvmuh.hu $xr13, $xr1, $xr18 ++ ++# CHECK: xvmuh.wu $xr15, $xr21, $xr16 ++# CHECK: encoding: [0xaf,0x42,0x89,0x74] ++xvmuh.wu $xr15, $xr21, $xr16 ++ ++# CHECK: xvmuh.du $xr11, $xr10, $xr19 ++# CHECK: encoding: [0x4b,0xcd,0x89,0x74] ++xvmuh.du $xr11, $xr10, $xr19 ++ ++# CHECK: xvmulwev.h.b $xr4, $xr12, $xr9 ++# CHECK: encoding: [0x84,0x25,0x90,0x74] ++xvmulwev.h.b $xr4, $xr12, $xr9 ++ ++# CHECK: xvmulwev.w.h $xr10, $xr3, $xr20 ++# CHECK: encoding: [0x6a,0xd0,0x90,0x74] ++xvmulwev.w.h $xr10, $xr3, $xr20 ++ ++# CHECK: xvmulwev.d.w $xr4, $xr22, $xr18 ++# CHECK: encoding: [0xc4,0x4a,0x91,0x74] ++xvmulwev.d.w $xr4, $xr22, $xr18 ++ ++# CHECK: xvmulwev.q.d $xr20, $xr21, $xr27 ++# CHECK: encoding: [0xb4,0xee,0x91,0x74] ++xvmulwev.q.d $xr20, $xr21, $xr27 ++ ++# CHECK: xvmulwod.h.b $xr5, $xr7, $xr0 ++# CHECK: encoding: [0xe5,0x00,0x92,0x74] ++xvmulwod.h.b $xr5, $xr7, $xr0 ++ ++# CHECK: xvmulwod.w.h $xr19, $xr28, $xr11 ++# CHECK: encoding: [0x93,0xaf,0x92,0x74] ++xvmulwod.w.h $xr19, $xr28, $xr11 ++ ++# CHECK: xvmulwod.d.w $xr19, $xr7, $xr16 ++# CHECK: encoding: [0xf3,0x40,0x93,0x74] ++xvmulwod.d.w $xr19, $xr7, $xr16 ++ ++# CHECK: xvmulwod.q.d $xr11, $xr12, $xr13 ++# CHECK: encoding: [0x8b,0xb5,0x93,0x74] ++xvmulwod.q.d $xr11, $xr12, $xr13 ++ ++# CHECK: xvmulwev.h.bu $xr22, $xr2, $xr1 ++# CHECK: encoding: [0x56,0x04,0x98,0x74] ++xvmulwev.h.bu $xr22, $xr2, $xr1 ++ ++# CHECK: xvmulwev.w.hu $xr2, $xr3, $xr4 ++# CHECK: encoding: [0x62,0x90,0x98,0x74] ++xvmulwev.w.hu $xr2, $xr3, $xr4 ++ ++# CHECK: xvmulwev.d.wu $xr2, $xr12, $xr25 ++# CHECK: encoding: [0x82,0x65,0x99,0x74] ++xvmulwev.d.wu $xr2, $xr12, $xr25 ++ ++# CHECK: xvmulwev.q.du $xr22, $xr29, $xr17 ++# CHECK: encoding: [0xb6,0xc7,0x99,0x74] ++xvmulwev.q.du $xr22, $xr29, $xr17 ++ ++# CHECK: xvmulwod.h.bu $xr9, $xr9, $xr0 ++# CHECK: encoding: [0x29,0x01,0x9a,0x74] ++xvmulwod.h.bu $xr9, $xr9, $xr0 ++ ++# CHECK: xvmulwod.w.hu $xr20, $xr2, $xr16 ++# CHECK: encoding: [0x54,0xc0,0x9a,0x74] ++xvmulwod.w.hu $xr20, $xr2, $xr16 ++ ++# CHECK: xvmulwod.d.wu $xr1, $xr11, $xr24 ++# CHECK: encoding: [0x61,0x61,0x9b,0x74] ++xvmulwod.d.wu $xr1, $xr11, $xr24 ++ ++# CHECK: xvmulwod.q.du $xr19, $xr2, $xr22 ++# CHECK: encoding: [0x53,0xd8,0x9b,0x74] ++xvmulwod.q.du $xr19, $xr2, $xr22 ++ ++# CHECK: xvmulwev.h.bu.b $xr22, $xr29, $xr24 ++# CHECK: encoding: [0xb6,0x63,0xa0,0x74] ++xvmulwev.h.bu.b $xr22, $xr29, $xr24 ++ ++# CHECK: xvmulwev.w.hu.h $xr1, $xr22, $xr11 ++# CHECK: encoding: [0xc1,0xae,0xa0,0x74] ++xvmulwev.w.hu.h $xr1, $xr22, $xr11 ++ ++# CHECK: xvmulwev.d.wu.w $xr12, $xr12, $xr12 ++# CHECK: encoding: [0x8c,0x31,0xa1,0x74] ++xvmulwev.d.wu.w $xr12, $xr12, $xr12 ++ ++# CHECK: xvmulwev.q.du.d $xr0, $xr17, $xr23 ++# CHECK: encoding: [0x20,0xde,0xa1,0x74] ++xvmulwev.q.du.d $xr0, $xr17, $xr23 ++ ++# CHECK: xvmulwod.h.bu.b $xr26, $xr16, $xr23 ++# CHECK: encoding: [0x1a,0x5e,0xa2,0x74] ++xvmulwod.h.bu.b $xr26, $xr16, $xr23 ++ ++# CHECK: xvmulwod.w.hu.h $xr31, $xr12, $xr9 ++# CHECK: encoding: [0x9f,0xa5,0xa2,0x74] ++xvmulwod.w.hu.h $xr31, $xr12, $xr9 ++ ++# CHECK: xvmulwod.d.wu.w $xr21, $xr27, $xr19 ++# CHECK: encoding: [0x75,0x4f,0xa3,0x74] ++xvmulwod.d.wu.w $xr21, $xr27, $xr19 ++ ++# CHECK: xvmulwod.q.du.d $xr7, $xr5, $xr11 ++# CHECK: encoding: [0xa7,0xac,0xa3,0x74] ++xvmulwod.q.du.d $xr7, $xr5, $xr11 ++ ++# CHECK: xvmadd.b $xr22, $xr11, $xr15 ++# CHECK: encoding: [0x76,0x3d,0xa8,0x74] ++xvmadd.b $xr22, $xr11, $xr15 ++ ++# CHECK: xvmadd.h $xr3, $xr30, $xr25 ++# CHECK: encoding: [0xc3,0xe7,0xa8,0x74] ++xvmadd.h $xr3, $xr30, $xr25 ++ ++# CHECK: xvmadd.w $xr1, $xr18, $xr5 ++# CHECK: encoding: [0x41,0x16,0xa9,0x74] ++xvmadd.w $xr1, $xr18, $xr5 ++ ++# CHECK: xvmadd.d $xr16, $xr21, $xr11 ++# CHECK: encoding: [0xb0,0xae,0xa9,0x74] ++xvmadd.d $xr16, $xr21, $xr11 ++ ++# CHECK: xvmsub.b $xr11, $xr12, $xr10 ++# CHECK: encoding: [0x8b,0x29,0xaa,0x74] ++xvmsub.b $xr11, $xr12, $xr10 ++ ++# CHECK: xvmsub.h $xr16, $xr11, $xr1 ++# CHECK: encoding: [0x70,0x85,0xaa,0x74] ++xvmsub.h $xr16, $xr11, $xr1 ++ ++# CHECK: xvmsub.w $xr15, $xr21, $xr21 ++# CHECK: encoding: [0xaf,0x56,0xab,0x74] ++xvmsub.w $xr15, $xr21, $xr21 ++ ++# CHECK: xvmsub.d $xr12, $xr11, $xr4 ++# CHECK: encoding: [0x6c,0x91,0xab,0x74] ++xvmsub.d $xr12, $xr11, $xr4 ++ ++# CHECK: xvmaddwev.h.b $xr21, $xr7, $xr6 ++# CHECK: encoding: [0xf5,0x18,0xac,0x74] ++xvmaddwev.h.b $xr21, $xr7, $xr6 ++ ++# CHECK: xvmaddwev.w.h $xr16, $xr29, $xr13 ++# CHECK: encoding: [0xb0,0xb7,0xac,0x74] ++xvmaddwev.w.h $xr16, $xr29, $xr13 ++ ++# CHECK: xvmaddwev.d.w $xr7, $xr25, $xr30 ++# CHECK: encoding: [0x27,0x7b,0xad,0x74] ++xvmaddwev.d.w $xr7, $xr25, $xr30 ++ ++# CHECK: xvmaddwev.q.d $xr19, $xr3, $xr8 ++# CHECK: encoding: [0x73,0xa0,0xad,0x74] ++xvmaddwev.q.d $xr19, $xr3, $xr8 ++ ++# CHECK: xvmaddwod.h.b $xr20, $xr27, $xr12 ++# CHECK: encoding: [0x74,0x33,0xae,0x74] ++xvmaddwod.h.b $xr20, $xr27, $xr12 ++ ++# CHECK: xvmaddwod.w.h $xr0, $xr21, $xr13 ++# CHECK: encoding: [0xa0,0xb6,0xae,0x74] ++xvmaddwod.w.h $xr0, $xr21, $xr13 ++ ++# CHECK: xvmaddwod.d.w $xr25, $xr13, $xr31 ++# CHECK: encoding: [0xb9,0x7d,0xaf,0x74] ++xvmaddwod.d.w $xr25, $xr13, $xr31 ++ ++# CHECK: xvmaddwod.q.d $xr26, $xr26, $xr16 ++# CHECK: encoding: [0x5a,0xc3,0xaf,0x74] ++xvmaddwod.q.d $xr26, $xr26, $xr16 ++ ++# CHECK: xvmaddwev.h.bu $xr18, $xr26, $xr21 ++# CHECK: encoding: [0x52,0x57,0xb4,0x74] ++xvmaddwev.h.bu $xr18, $xr26, $xr21 ++ ++# CHECK: xvmaddwev.w.hu $xr14, $xr16, $xr5 ++# CHECK: encoding: [0x0e,0x96,0xb4,0x74] ++xvmaddwev.w.hu $xr14, $xr16, $xr5 ++ ++# CHECK: xvmaddwev.d.wu $xr19, $xr29, $xr20 ++# CHECK: encoding: [0xb3,0x53,0xb5,0x74] ++xvmaddwev.d.wu $xr19, $xr29, $xr20 ++ ++# CHECK: xvmaddwev.q.du $xr15, $xr29, $xr17 ++# CHECK: encoding: [0xaf,0xc7,0xb5,0x74] ++xvmaddwev.q.du $xr15, $xr29, $xr17 ++ ++# CHECK: xvmaddwod.h.bu $xr13, $xr26, $xr1 ++# CHECK: encoding: [0x4d,0x07,0xb6,0x74] ++xvmaddwod.h.bu $xr13, $xr26, $xr1 ++ ++# CHECK: xvmaddwod.w.hu $xr15, $xr25, $xr16 ++# CHECK: encoding: [0x2f,0xc3,0xb6,0x74] ++xvmaddwod.w.hu $xr15, $xr25, $xr16 ++ ++# CHECK: xvmaddwod.d.wu $xr23, $xr4, $xr9 ++# CHECK: encoding: [0x97,0x24,0xb7,0x74] ++xvmaddwod.d.wu $xr23, $xr4, $xr9 ++ ++# CHECK: xvmaddwod.q.du $xr29, $xr22, $xr17 ++# CHECK: encoding: [0xdd,0xc6,0xb7,0x74] ++xvmaddwod.q.du $xr29, $xr22, $xr17 ++ ++# CHECK: xvmaddwev.h.bu.b $xr23, $xr1, $xr6 ++# CHECK: encoding: [0x37,0x18,0xbc,0x74] ++xvmaddwev.h.bu.b $xr23, $xr1, $xr6 ++ ++# CHECK: xvmaddwev.w.hu.h $xr4, $xr27, $xr12 ++# CHECK: encoding: [0x64,0xb3,0xbc,0x74] ++xvmaddwev.w.hu.h $xr4, $xr27, $xr12 ++ ++# CHECK: xvmaddwev.d.wu.w $xr0, $xr2, $xr5 ++# CHECK: encoding: [0x40,0x14,0xbd,0x74] ++xvmaddwev.d.wu.w $xr0, $xr2, $xr5 ++ ++# CHECK: xvmaddwev.q.du.d $xr9, $xr31, $xr1 ++# CHECK: encoding: [0xe9,0x87,0xbd,0x74] ++xvmaddwev.q.du.d $xr9, $xr31, $xr1 ++ ++# CHECK: xvmaddwod.h.bu.b $xr9, $xr19, $xr20 ++# CHECK: encoding: [0x69,0x52,0xbe,0x74] ++xvmaddwod.h.bu.b $xr9, $xr19, $xr20 ++ ++# CHECK: xvmaddwod.w.hu.h $xr7, $xr5, $xr13 ++# CHECK: encoding: [0xa7,0xb4,0xbe,0x74] ++xvmaddwod.w.hu.h $xr7, $xr5, $xr13 ++ ++# CHECK: xvmaddwod.d.wu.w $xr10, $xr27, $xr1 ++# CHECK: encoding: [0x6a,0x07,0xbf,0x74] ++xvmaddwod.d.wu.w $xr10, $xr27, $xr1 ++ ++# CHECK: xvmaddwod.q.du.d $xr25, $xr19, $xr0 ++# CHECK: encoding: [0x79,0x82,0xbf,0x74] ++xvmaddwod.q.du.d $xr25, $xr19, $xr0 ++ ++# CHECK: xvdiv.b $xr3, $xr31, $xr2 ++# CHECK: encoding: [0xe3,0x0b,0xe0,0x74] ++xvdiv.b $xr3, $xr31, $xr2 ++ ++# CHECK: xvdiv.h $xr1, $xr12, $xr17 ++# CHECK: encoding: [0x81,0xc5,0xe0,0x74] ++xvdiv.h $xr1, $xr12, $xr17 ++ ++# CHECK: xvdiv.w $xr13, $xr0, $xr12 ++# CHECK: encoding: [0x0d,0x30,0xe1,0x74] ++xvdiv.w $xr13, $xr0, $xr12 ++ ++# CHECK: xvdiv.d $xr17, $xr5, $xr11 ++# CHECK: encoding: [0xb1,0xac,0xe1,0x74] ++xvdiv.d $xr17, $xr5, $xr11 ++ ++# CHECK: xvmod.b $xr22, $xr17, $xr1 ++# CHECK: encoding: [0x36,0x06,0xe2,0x74] ++xvmod.b $xr22, $xr17, $xr1 ++ ++# CHECK: xvmod.h $xr28, $xr5, $xr12 ++# CHECK: encoding: [0xbc,0xb0,0xe2,0x74] ++xvmod.h $xr28, $xr5, $xr12 ++ ++# CHECK: xvmod.w $xr29, $xr19, $xr14 ++# CHECK: encoding: [0x7d,0x3a,0xe3,0x74] ++xvmod.w $xr29, $xr19, $xr14 ++ ++# CHECK: xvmod.d $xr17, $xr8, $xr6 ++# CHECK: encoding: [0x11,0x99,0xe3,0x74] ++xvmod.d $xr17, $xr8, $xr6 ++ ++# CHECK: xvdiv.bu $xr23, $xr6, $xr2 ++# CHECK: encoding: [0xd7,0x08,0xe4,0x74] ++xvdiv.bu $xr23, $xr6, $xr2 ++ ++# CHECK: xvdiv.hu $xr9, $xr31, $xr0 ++# CHECK: encoding: [0xe9,0x83,0xe4,0x74] ++xvdiv.hu $xr9, $xr31, $xr0 ++ ++# CHECK: xvdiv.wu $xr15, $xr1, $xr4 ++# CHECK: encoding: [0x2f,0x10,0xe5,0x74] ++xvdiv.wu $xr15, $xr1, $xr4 ++ ++# CHECK: xvdiv.du $xr14, $xr29, $xr11 ++# CHECK: encoding: [0xae,0xaf,0xe5,0x74] ++xvdiv.du $xr14, $xr29, $xr11 ++ ++# CHECK: xvmod.bu $xr4, $xr12, $xr31 ++# CHECK: encoding: [0x84,0x7d,0xe6,0x74] ++xvmod.bu $xr4, $xr12, $xr31 ++ ++# CHECK: xvmod.hu $xr22, $xr12, $xr11 ++# CHECK: encoding: [0x96,0xad,0xe6,0x74] ++xvmod.hu $xr22, $xr12, $xr11 ++ ++# CHECK: xvmod.wu $xr21, $xr23, $xr10 ++# CHECK: encoding: [0xf5,0x2a,0xe7,0x74] ++xvmod.wu $xr21, $xr23, $xr10 ++ ++# CHECK: xvmod.du $xr21, $xr21, $xr31 ++# CHECK: encoding: [0xb5,0xfe,0xe7,0x74] ++xvmod.du $xr21, $xr21, $xr31 ++ ++# CHECK: xvsll.b $xr16, $xr10, $xr11 ++# CHECK: encoding: [0x50,0x2d,0xe8,0x74] ++xvsll.b $xr16, $xr10, $xr11 ++ ++# CHECK: xvsll.h $xr12, $xr10, $xr27 ++# CHECK: encoding: [0x4c,0xed,0xe8,0x74] ++xvsll.h $xr12, $xr10, $xr27 ++ ++# CHECK: xvsll.w $xr30, $xr2, $xr26 ++# CHECK: encoding: [0x5e,0x68,0xe9,0x74] ++xvsll.w $xr30, $xr2, $xr26 ++ ++# CHECK: xvsll.d $xr8, $xr21, $xr17 ++# CHECK: encoding: [0xa8,0xc6,0xe9,0x74] ++xvsll.d $xr8, $xr21, $xr17 ++ ++# CHECK: xvsrl.b $xr27, $xr24, $xr18 ++# CHECK: encoding: [0x1b,0x4b,0xea,0x74] ++xvsrl.b $xr27, $xr24, $xr18 ++ ++# CHECK: xvsrl.h $xr17, $xr31, $xr24 ++# CHECK: encoding: [0xf1,0xe3,0xea,0x74] ++xvsrl.h $xr17, $xr31, $xr24 ++ ++# CHECK: xvsrl.w $xr5, $xr3, $xr4 ++# CHECK: encoding: [0x65,0x10,0xeb,0x74] ++xvsrl.w $xr5, $xr3, $xr4 ++ ++# CHECK: xvsrl.d $xr21, $xr6, $xr8 ++# CHECK: encoding: [0xd5,0xa0,0xeb,0x74] ++xvsrl.d $xr21, $xr6, $xr8 ++ ++# CHECK: xvsra.b $xr28, $xr28, $xr21 ++# CHECK: encoding: [0x9c,0x57,0xec,0x74] ++xvsra.b $xr28, $xr28, $xr21 ++ ++# CHECK: xvsra.h $xr19, $xr4, $xr26 ++# CHECK: encoding: [0x93,0xe8,0xec,0x74] ++xvsra.h $xr19, $xr4, $xr26 ++ ++# CHECK: xvsra.w $xr13, $xr20, $xr1 ++# CHECK: encoding: [0x8d,0x06,0xed,0x74] ++xvsra.w $xr13, $xr20, $xr1 ++ ++# CHECK: xvsra.d $xr0, $xr8, $xr18 ++# CHECK: encoding: [0x00,0xc9,0xed,0x74] ++xvsra.d $xr0, $xr8, $xr18 ++ ++# CHECK: xvrotr.b $xr8, $xr30, $xr28 ++# CHECK: encoding: [0xc8,0x73,0xee,0x74] ++xvrotr.b $xr8, $xr30, $xr28 ++ ++# CHECK: xvrotr.h $xr17, $xr19, $xr0 ++# CHECK: encoding: [0x71,0x82,0xee,0x74] ++xvrotr.h $xr17, $xr19, $xr0 ++ ++# CHECK: xvrotr.w $xr15, $xr28, $xr23 ++# CHECK: encoding: [0x8f,0x5f,0xef,0x74] ++xvrotr.w $xr15, $xr28, $xr23 ++ ++# CHECK: xvrotr.d $xr31, $xr2, $xr21 ++# CHECK: encoding: [0x5f,0xd4,0xef,0x74] ++xvrotr.d $xr31, $xr2, $xr21 ++ ++# CHECK: xvsrlr.b $xr20, $xr26, $xr11 ++# CHECK: encoding: [0x54,0x2f,0xf0,0x74] ++xvsrlr.b $xr20, $xr26, $xr11 ++ ++# CHECK: xvsrlr.h $xr13, $xr18, $xr7 ++# CHECK: encoding: [0x4d,0x9e,0xf0,0x74] ++xvsrlr.h $xr13, $xr18, $xr7 ++ ++# CHECK: xvsrlr.w $xr28, $xr1, $xr3 ++# CHECK: encoding: [0x3c,0x0c,0xf1,0x74] ++xvsrlr.w $xr28, $xr1, $xr3 ++ ++# CHECK: xvsrlr.d $xr6, $xr3, $xr14 ++# CHECK: encoding: [0x66,0xb8,0xf1,0x74] ++xvsrlr.d $xr6, $xr3, $xr14 ++ ++# CHECK: xvsrar.b $xr10, $xr8, $xr17 ++# CHECK: encoding: [0x0a,0x45,0xf2,0x74] ++xvsrar.b $xr10, $xr8, $xr17 ++ ++# CHECK: xvsrar.h $xr31, $xr2, $xr11 ++# CHECK: encoding: [0x5f,0xac,0xf2,0x74] ++xvsrar.h $xr31, $xr2, $xr11 ++ ++# CHECK: xvsrar.w $xr13, $xr8, $xr5 ++# CHECK: encoding: [0x0d,0x15,0xf3,0x74] ++xvsrar.w $xr13, $xr8, $xr5 ++ ++# CHECK: xvsrar.d $xr12, $xr18, $xr0 ++# CHECK: encoding: [0x4c,0x82,0xf3,0x74] ++xvsrar.d $xr12, $xr18, $xr0 ++ ++# CHECK: xvsrln.b.h $xr15, $xr6, $xr15 ++# CHECK: encoding: [0xcf,0xbc,0xf4,0x74] ++xvsrln.b.h $xr15, $xr6, $xr15 ++ ++# CHECK: xvsrln.h.w $xr22, $xr19, $xr17 ++# CHECK: encoding: [0x76,0x46,0xf5,0x74] ++xvsrln.h.w $xr22, $xr19, $xr17 ++ ++# CHECK: xvsrln.w.d $xr4, $xr7, $xr5 ++# CHECK: encoding: [0xe4,0x94,0xf5,0x74] ++xvsrln.w.d $xr4, $xr7, $xr5 ++ ++# CHECK: xvsran.b.h $xr3, $xr19, $xr23 ++# CHECK: encoding: [0x63,0xde,0xf6,0x74] ++xvsran.b.h $xr3, $xr19, $xr23 ++ ++# CHECK: xvsran.h.w $xr16, $xr6, $xr1 ++# CHECK: encoding: [0xd0,0x04,0xf7,0x74] ++xvsran.h.w $xr16, $xr6, $xr1 ++ ++# CHECK: xvsran.w.d $xr27, $xr16, $xr0 ++# CHECK: encoding: [0x1b,0x82,0xf7,0x74] ++xvsran.w.d $xr27, $xr16, $xr0 ++ ++# CHECK: xvsrlrn.b.h $xr2, $xr9, $xr9 ++# CHECK: encoding: [0x22,0xa5,0xf8,0x74] ++xvsrlrn.b.h $xr2, $xr9, $xr9 ++ ++# CHECK: xvsrlrn.h.w $xr16, $xr11, $xr19 ++# CHECK: encoding: [0x70,0x4d,0xf9,0x74] ++xvsrlrn.h.w $xr16, $xr11, $xr19 ++ ++# CHECK: xvsrlrn.w.d $xr29, $xr25, $xr15 ++# CHECK: encoding: [0x3d,0xbf,0xf9,0x74] ++xvsrlrn.w.d $xr29, $xr25, $xr15 ++ ++# CHECK: xvsrarn.b.h $xr13, $xr20, $xr13 ++# CHECK: encoding: [0x8d,0xb6,0xfa,0x74] ++xvsrarn.b.h $xr13, $xr20, $xr13 ++ ++# CHECK: xvsrarn.h.w $xr13, $xr22, $xr1 ++# CHECK: encoding: [0xcd,0x06,0xfb,0x74] ++xvsrarn.h.w $xr13, $xr22, $xr1 ++ ++# CHECK: xvsrarn.w.d $xr13, $xr12, $xr2 ++# CHECK: encoding: [0x8d,0x89,0xfb,0x74] ++xvsrarn.w.d $xr13, $xr12, $xr2 ++ ++# CHECK: xvssrln.b.h $xr19, $xr19, $xr10 ++# CHECK: encoding: [0x73,0xaa,0xfc,0x74] ++xvssrln.b.h $xr19, $xr19, $xr10 ++ ++# CHECK: xvssrln.h.w $xr12, $xr24, $xr17 ++# CHECK: encoding: [0x0c,0x47,0xfd,0x74] ++xvssrln.h.w $xr12, $xr24, $xr17 ++ ++# CHECK: xvssrln.w.d $xr7, $xr30, $xr14 ++# CHECK: encoding: [0xc7,0xbb,0xfd,0x74] ++xvssrln.w.d $xr7, $xr30, $xr14 ++ ++# CHECK: xvssran.b.h $xr6, $xr9, $xr23 ++# CHECK: encoding: [0x26,0xdd,0xfe,0x74] ++xvssran.b.h $xr6, $xr9, $xr23 ++ ++# CHECK: xvssran.h.w $xr13, $xr9, $xr2 ++# CHECK: encoding: [0x2d,0x09,0xff,0x74] ++xvssran.h.w $xr13, $xr9, $xr2 ++ ++# CHECK: xvssran.w.d $xr18, $xr26, $xr1 ++# CHECK: encoding: [0x52,0x87,0xff,0x74] ++xvssran.w.d $xr18, $xr26, $xr1 ++ ++# CHECK: xvssrlrn.b.h $xr24, $xr17, $xr23 ++# CHECK: encoding: [0x38,0xde,0x00,0x75] ++xvssrlrn.b.h $xr24, $xr17, $xr23 ++ ++# CHECK: xvssrlrn.h.w $xr10, $xr12, $xr8 ++# CHECK: encoding: [0x8a,0x21,0x01,0x75] ++xvssrlrn.h.w $xr10, $xr12, $xr8 ++ ++# CHECK: xvssrlrn.w.d $xr30, $xr27, $xr6 ++# CHECK: encoding: [0x7e,0x9b,0x01,0x75] ++xvssrlrn.w.d $xr30, $xr27, $xr6 ++ ++# CHECK: xvssrarn.b.h $xr20, $xr27, $xr31 ++# CHECK: encoding: [0x74,0xff,0x02,0x75] ++xvssrarn.b.h $xr20, $xr27, $xr31 ++ ++# CHECK: xvssrarn.h.w $xr24, $xr23, $xr23 ++# CHECK: encoding: [0xf8,0x5e,0x03,0x75] ++xvssrarn.h.w $xr24, $xr23, $xr23 ++ ++# CHECK: xvssrarn.w.d $xr8, $xr29, $xr25 ++# CHECK: encoding: [0xa8,0xe7,0x03,0x75] ++xvssrarn.w.d $xr8, $xr29, $xr25 ++ ++# CHECK: xvssrln.bu.h $xr14, $xr4, $xr17 ++# CHECK: encoding: [0x8e,0xc4,0x04,0x75] ++xvssrln.bu.h $xr14, $xr4, $xr17 ++ ++# CHECK: xvssrln.hu.w $xr28, $xr20, $xr10 ++# CHECK: encoding: [0x9c,0x2a,0x05,0x75] ++xvssrln.hu.w $xr28, $xr20, $xr10 ++ ++# CHECK: xvssrln.wu.d $xr10, $xr8, $xr20 ++# CHECK: encoding: [0x0a,0xd1,0x05,0x75] ++xvssrln.wu.d $xr10, $xr8, $xr20 ++ ++# CHECK: xvssran.bu.h $xr18, $xr28, $xr23 ++# CHECK: encoding: [0x92,0xdf,0x06,0x75] ++xvssran.bu.h $xr18, $xr28, $xr23 ++ ++# CHECK: xvssran.hu.w $xr25, $xr19, $xr24 ++# CHECK: encoding: [0x79,0x62,0x07,0x75] ++xvssran.hu.w $xr25, $xr19, $xr24 ++ ++# CHECK: xvssran.wu.d $xr16, $xr29, $xr18 ++# CHECK: encoding: [0xb0,0xcb,0x07,0x75] ++xvssran.wu.d $xr16, $xr29, $xr18 ++ ++# CHECK: xvssrlrn.bu.h $xr2, $xr19, $xr14 ++# CHECK: encoding: [0x62,0xba,0x08,0x75] ++xvssrlrn.bu.h $xr2, $xr19, $xr14 ++ ++# CHECK: xvssrlrn.hu.w $xr6, $xr0, $xr18 ++# CHECK: encoding: [0x06,0x48,0x09,0x75] ++xvssrlrn.hu.w $xr6, $xr0, $xr18 ++ ++# CHECK: xvssrlrn.wu.d $xr30, $xr4, $xr31 ++# CHECK: encoding: [0x9e,0xfc,0x09,0x75] ++xvssrlrn.wu.d $xr30, $xr4, $xr31 ++ ++# CHECK: xvssrarn.bu.h $xr16, $xr28, $xr8 ++# CHECK: encoding: [0x90,0xa3,0x0a,0x75] ++xvssrarn.bu.h $xr16, $xr28, $xr8 ++ ++# CHECK: xvssrarn.hu.w $xr11, $xr2, $xr6 ++# CHECK: encoding: [0x4b,0x18,0x0b,0x75] ++xvssrarn.hu.w $xr11, $xr2, $xr6 ++ ++# CHECK: xvssrarn.wu.d $xr22, $xr6, $xr12 ++# CHECK: encoding: [0xd6,0xb0,0x0b,0x75] ++xvssrarn.wu.d $xr22, $xr6, $xr12 ++ ++# CHECK: xvbitclr.b $xr4, $xr16, $xr16 ++# CHECK: encoding: [0x04,0x42,0x0c,0x75] ++xvbitclr.b $xr4, $xr16, $xr16 ++ ++# CHECK: xvbitclr.h $xr16, $xr31, $xr26 ++# CHECK: encoding: [0xf0,0xeb,0x0c,0x75] ++xvbitclr.h $xr16, $xr31, $xr26 ++ ++# CHECK: xvbitclr.w $xr24, $xr2, $xr20 ++# CHECK: encoding: [0x58,0x50,0x0d,0x75] ++xvbitclr.w $xr24, $xr2, $xr20 ++ ++# CHECK: xvbitclr.d $xr18, $xr12, $xr30 ++# CHECK: encoding: [0x92,0xf9,0x0d,0x75] ++xvbitclr.d $xr18, $xr12, $xr30 ++ ++# CHECK: xvbitset.b $xr26, $xr27, $xr23 ++# CHECK: encoding: [0x7a,0x5f,0x0e,0x75] ++xvbitset.b $xr26, $xr27, $xr23 ++ ++# CHECK: xvbitset.h $xr19, $xr19, $xr11 ++# CHECK: encoding: [0x73,0xae,0x0e,0x75] ++xvbitset.h $xr19, $xr19, $xr11 ++ ++# CHECK: xvbitset.w $xr7, $xr9, $xr18 ++# CHECK: encoding: [0x27,0x49,0x0f,0x75] ++xvbitset.w $xr7, $xr9, $xr18 ++ ++# CHECK: xvbitset.d $xr6, $xr30, $xr3 ++# CHECK: encoding: [0xc6,0x8f,0x0f,0x75] ++xvbitset.d $xr6, $xr30, $xr3 ++ ++# CHECK: xvbitrev.b $xr30, $xr13, $xr7 ++# CHECK: encoding: [0xbe,0x1d,0x10,0x75] ++xvbitrev.b $xr30, $xr13, $xr7 ++ ++# CHECK: xvbitrev.h $xr12, $xr3, $xr8 ++# CHECK: encoding: [0x6c,0xa0,0x10,0x75] ++xvbitrev.h $xr12, $xr3, $xr8 ++ ++# CHECK: xvbitrev.w $xr8, $xr20, $xr20 ++# CHECK: encoding: [0x88,0x52,0x11,0x75] ++xvbitrev.w $xr8, $xr20, $xr20 ++ ++# CHECK: xvbitrev.d $xr28, $xr7, $xr17 ++# CHECK: encoding: [0xfc,0xc4,0x11,0x75] ++xvbitrev.d $xr28, $xr7, $xr17 ++ ++# CHECK: xvpackev.b $xr29, $xr18, $xr12 ++# CHECK: encoding: [0x5d,0x32,0x16,0x75] ++xvpackev.b $xr29, $xr18, $xr12 ++ ++# CHECK: xvpackev.h $xr6, $xr11, $xr17 ++# CHECK: encoding: [0x66,0xc5,0x16,0x75] ++xvpackev.h $xr6, $xr11, $xr17 ++ ++# CHECK: xvpackev.w $xr2, $xr2, $xr30 ++# CHECK: encoding: [0x42,0x78,0x17,0x75] ++xvpackev.w $xr2, $xr2, $xr30 ++ ++# CHECK: xvpackev.d $xr26, $xr15, $xr21 ++# CHECK: encoding: [0xfa,0xd5,0x17,0x75] ++xvpackev.d $xr26, $xr15, $xr21 ++ ++# CHECK: xvpackod.b $xr19, $xr17, $xr17 ++# CHECK: encoding: [0x33,0x46,0x18,0x75] ++xvpackod.b $xr19, $xr17, $xr17 ++ ++# CHECK: xvpackod.h $xr15, $xr8, $xr3 ++# CHECK: encoding: [0x0f,0x8d,0x18,0x75] ++xvpackod.h $xr15, $xr8, $xr3 ++ ++# CHECK: xvpackod.w $xr13, $xr15, $xr12 ++# CHECK: encoding: [0xed,0x31,0x19,0x75] ++xvpackod.w $xr13, $xr15, $xr12 ++ ++# CHECK: xvpackod.d $xr5, $xr3, $xr26 ++# CHECK: encoding: [0x65,0xe8,0x19,0x75] ++xvpackod.d $xr5, $xr3, $xr26 ++ ++# CHECK: xvilvl.b $xr27, $xr9, $xr1 ++# CHECK: encoding: [0x3b,0x05,0x1a,0x75] ++xvilvl.b $xr27, $xr9, $xr1 ++ ++# CHECK: xvilvl.h $xr29, $xr8, $xr1 ++# CHECK: encoding: [0x1d,0x85,0x1a,0x75] ++xvilvl.h $xr29, $xr8, $xr1 ++ ++# CHECK: xvilvl.w $xr9, $xr8, $xr7 ++# CHECK: encoding: [0x09,0x1d,0x1b,0x75] ++xvilvl.w $xr9, $xr8, $xr7 ++ ++# CHECK: xvilvl.d $xr25, $xr7, $xr18 ++# CHECK: encoding: [0xf9,0xc8,0x1b,0x75] ++xvilvl.d $xr25, $xr7, $xr18 ++ ++# CHECK: xvilvh.b $xr7, $xr24, $xr26 ++# CHECK: encoding: [0x07,0x6b,0x1c,0x75] ++xvilvh.b $xr7, $xr24, $xr26 ++ ++# CHECK: xvilvh.h $xr6, $xr20, $xr28 ++# CHECK: encoding: [0x86,0xf2,0x1c,0x75] ++xvilvh.h $xr6, $xr20, $xr28 ++ ++# CHECK: xvilvh.w $xr13, $xr5, $xr12 ++# CHECK: encoding: [0xad,0x30,0x1d,0x75] ++xvilvh.w $xr13, $xr5, $xr12 ++ ++# CHECK: xvilvh.d $xr1, $xr21, $xr31 ++# CHECK: encoding: [0xa1,0xfe,0x1d,0x75] ++xvilvh.d $xr1, $xr21, $xr31 ++ ++# CHECK: xvpickev.b $xr17, $xr13, $xr31 ++# CHECK: encoding: [0xb1,0x7d,0x1e,0x75] ++xvpickev.b $xr17, $xr13, $xr31 ++ ++# CHECK: xvpickev.h $xr4, $xr8, $xr14 ++# CHECK: encoding: [0x04,0xb9,0x1e,0x75] ++xvpickev.h $xr4, $xr8, $xr14 ++ ++# CHECK: xvpickev.w $xr10, $xr8, $xr11 ++# CHECK: encoding: [0x0a,0x2d,0x1f,0x75] ++xvpickev.w $xr10, $xr8, $xr11 ++ ++# CHECK: xvpickev.d $xr26, $xr20, $xr8 ++# CHECK: encoding: [0x9a,0xa2,0x1f,0x75] ++xvpickev.d $xr26, $xr20, $xr8 ++ ++# CHECK: xvpickod.b $xr19, $xr21, $xr27 ++# CHECK: encoding: [0xb3,0x6e,0x20,0x75] ++xvpickod.b $xr19, $xr21, $xr27 ++ ++# CHECK: xvpickod.h $xr28, $xr5, $xr19 ++# CHECK: encoding: [0xbc,0xcc,0x20,0x75] ++xvpickod.h $xr28, $xr5, $xr19 ++ ++# CHECK: xvpickod.w $xr21, $xr18, $xr22 ++# CHECK: encoding: [0x55,0x5a,0x21,0x75] ++xvpickod.w $xr21, $xr18, $xr22 ++ ++# CHECK: xvpickod.d $xr28, $xr7, $xr18 ++# CHECK: encoding: [0xfc,0xc8,0x21,0x75] ++xvpickod.d $xr28, $xr7, $xr18 ++ ++# CHECK: xvreplve.b $xr6, $xr20, $r25 ++# CHECK: encoding: [0x86,0x66,0x22,0x75] ++xvreplve.b $xr6, $xr20, $r25 ++ ++# CHECK: xvreplve.h $xr27, $xr7, $r14 ++# CHECK: encoding: [0xfb,0xb8,0x22,0x75] ++xvreplve.h $xr27, $xr7, $r14 ++ ++# CHECK: xvreplve.w $xr1, $xr4, $r15 ++# CHECK: encoding: [0x81,0x3c,0x23,0x75] ++xvreplve.w $xr1, $xr4, $r15 ++ ++# CHECK: xvreplve.d $xr12, $xr12, $r16 ++# CHECK: encoding: [0x8c,0xc1,0x23,0x75] ++xvreplve.d $xr12, $xr12, $r16 ++ ++# CHECK: xvand.v $xr1, $xr3, $xr29 ++# CHECK: encoding: [0x61,0x74,0x26,0x75] ++xvand.v $xr1, $xr3, $xr29 ++ ++# CHECK: xvor.v $xr23, $xr11, $xr20 ++# CHECK: encoding: [0x77,0xd1,0x26,0x75] ++xvor.v $xr23, $xr11, $xr20 ++ ++# CHECK: xvxor.v $xr31, $xr1, $xr30 ++# CHECK: encoding: [0x3f,0x78,0x27,0x75] ++xvxor.v $xr31, $xr1, $xr30 ++ ++# CHECK: xvnor.v $xr29, $xr26, $xr13 ++# CHECK: encoding: [0x5d,0xb7,0x27,0x75] ++xvnor.v $xr29, $xr26, $xr13 ++ ++# CHECK: xvandn.v $xr9, $xr14, $xr0 ++# CHECK: encoding: [0xc9,0x01,0x28,0x75] ++xvandn.v $xr9, $xr14, $xr0 ++ ++# CHECK: xvorn.v $xr25, $xr8, $xr12 ++# CHECK: encoding: [0x19,0xb1,0x28,0x75] ++xvorn.v $xr25, $xr8, $xr12 ++ ++# CHECK: xvfrstp.b $xr21, $xr26, $xr26 ++# CHECK: encoding: [0x55,0x6b,0x2b,0x75] ++xvfrstp.b $xr21, $xr26, $xr26 ++ ++# CHECK: xvfrstp.h $xr4, $xr17, $xr2 ++# CHECK: encoding: [0x24,0x8a,0x2b,0x75] ++xvfrstp.h $xr4, $xr17, $xr2 ++ ++# CHECK: xvadd.q $xr29, $xr28, $xr17 ++# CHECK: encoding: [0x9d,0x47,0x2d,0x75] ++xvadd.q $xr29, $xr28, $xr17 ++ ++# CHECK: xvsub.q $xr29, $xr2, $xr27 ++# CHECK: encoding: [0x5d,0xec,0x2d,0x75] ++xvsub.q $xr29, $xr2, $xr27 ++ ++# CHECK: xvsigncov.b $xr18, $xr28, $xr7 ++# CHECK: encoding: [0x92,0x1f,0x2e,0x75] ++xvsigncov.b $xr18, $xr28, $xr7 ++ ++# CHECK: xvsigncov.h $xr18, $xr12, $xr17 ++# CHECK: encoding: [0x92,0xc5,0x2e,0x75] ++xvsigncov.h $xr18, $xr12, $xr17 ++ ++# CHECK: xvsigncov.w $xr26, $xr1, $xr0 ++# CHECK: encoding: [0x3a,0x00,0x2f,0x75] ++xvsigncov.w $xr26, $xr1, $xr0 ++ ++# CHECK: xvsigncov.d $xr10, $xr27, $xr14 ++# CHECK: encoding: [0x6a,0xbb,0x2f,0x75] ++xvsigncov.d $xr10, $xr27, $xr14 ++ ++# CHECK: xvfadd.s $xr15, $xr25, $xr8 ++# CHECK: encoding: [0x2f,0xa3,0x30,0x75] ++xvfadd.s $xr15, $xr25, $xr8 ++ ++# CHECK: xvfadd.d $xr19, $xr6, $xr21 ++# CHECK: encoding: [0xd3,0x54,0x31,0x75] ++xvfadd.d $xr19, $xr6, $xr21 ++ ++# CHECK: xvfsub.s $xr26, $xr6, $xr6 ++# CHECK: encoding: [0xda,0x98,0x32,0x75] ++xvfsub.s $xr26, $xr6, $xr6 ++ ++# CHECK: xvfsub.d $xr9, $xr0, $xr21 ++# CHECK: encoding: [0x09,0x54,0x33,0x75] ++xvfsub.d $xr9, $xr0, $xr21 ++ ++# CHECK: xvfmul.s $xr6, $xr8, $xr14 ++# CHECK: encoding: [0x06,0xb9,0x38,0x75] ++xvfmul.s $xr6, $xr8, $xr14 ++ ++# CHECK: xvfmul.d $xr11, $xr21, $xr26 ++# CHECK: encoding: [0xab,0x6a,0x39,0x75] ++xvfmul.d $xr11, $xr21, $xr26 ++ ++# CHECK: xvfdiv.s $xr11, $xr7, $xr6 ++# CHECK: encoding: [0xeb,0x98,0x3a,0x75] ++xvfdiv.s $xr11, $xr7, $xr6 ++ ++# CHECK: xvfdiv.d $xr0, $xr26, $xr4 ++# CHECK: encoding: [0x40,0x13,0x3b,0x75] ++xvfdiv.d $xr0, $xr26, $xr4 ++ ++# CHECK: xvfmax.s $xr7, $xr9, $xr4 ++# CHECK: encoding: [0x27,0x91,0x3c,0x75] ++xvfmax.s $xr7, $xr9, $xr4 ++ ++# CHECK: xvfmax.d $xr0, $xr26, $xr20 ++# CHECK: encoding: [0x40,0x53,0x3d,0x75] ++xvfmax.d $xr0, $xr26, $xr20 ++ ++# CHECK: xvfmin.s $xr8, $xr10, $xr26 ++# CHECK: encoding: [0x48,0xe9,0x3e,0x75] ++xvfmin.s $xr8, $xr10, $xr26 ++ ++# CHECK: xvfmin.d $xr2, $xr22, $xr25 ++# CHECK: encoding: [0xc2,0x66,0x3f,0x75] ++xvfmin.d $xr2, $xr22, $xr25 ++ ++# CHECK: xvfmaxa.s $xr17, $xr4, $xr1 ++# CHECK: encoding: [0x91,0x84,0x40,0x75] ++xvfmaxa.s $xr17, $xr4, $xr1 ++ ++# CHECK: xvfmaxa.d $xr27, $xr23, $xr9 ++# CHECK: encoding: [0xfb,0x26,0x41,0x75] ++xvfmaxa.d $xr27, $xr23, $xr9 ++ ++# CHECK: xvfmina.s $xr21, $xr3, $xr27 ++# CHECK: encoding: [0x75,0xec,0x42,0x75] ++xvfmina.s $xr21, $xr3, $xr27 ++ ++# CHECK: xvfmina.d $xr7, $xr6, $xr4 ++# CHECK: encoding: [0xc7,0x10,0x43,0x75] ++xvfmina.d $xr7, $xr6, $xr4 ++ ++# CHECK: xvfcvt.h.s $xr9, $xr10, $xr20 ++# CHECK: encoding: [0x49,0x51,0x46,0x75] ++xvfcvt.h.s $xr9, $xr10, $xr20 ++ ++# CHECK: xvfcvt.s.d $xr5, $xr23, $xr21 ++# CHECK: encoding: [0xe5,0xd6,0x46,0x75] ++xvfcvt.s.d $xr5, $xr23, $xr21 ++ ++# CHECK: xvffint.s.l $xr28, $xr24, $xr10 ++# CHECK: encoding: [0x1c,0x2b,0x48,0x75] ++xvffint.s.l $xr28, $xr24, $xr10 ++ ++# CHECK: xvftint.w.d $xr6, $xr24, $xr1 ++# CHECK: encoding: [0x06,0x87,0x49,0x75] ++xvftint.w.d $xr6, $xr24, $xr1 ++ ++# CHECK: xvftintrm.w.d $xr27, $xr26, $xr30 ++# CHECK: encoding: [0x5b,0x7b,0x4a,0x75] ++xvftintrm.w.d $xr27, $xr26, $xr30 ++ ++# CHECK: xvftintrp.w.d $xr31, $xr12, $xr1 ++# CHECK: encoding: [0x9f,0x85,0x4a,0x75] ++xvftintrp.w.d $xr31, $xr12, $xr1 ++ ++# CHECK: xvftintrz.w.d $xr11, $xr21, $xr21 ++# CHECK: encoding: [0xab,0x56,0x4b,0x75] ++xvftintrz.w.d $xr11, $xr21, $xr21 ++ ++# CHECK: xvftintrne.w.d $xr15, $xr8, $xr28 ++# CHECK: encoding: [0x0f,0xf1,0x4b,0x75] ++xvftintrne.w.d $xr15, $xr8, $xr28 ++ ++# CHECK: xvshuf.h $xr20, $xr21, $xr3 ++# CHECK: encoding: [0xb4,0x8e,0x7a,0x75] ++xvshuf.h $xr20, $xr21, $xr3 ++ ++# CHECK: xvshuf.w $xr22, $xr2, $xr31 ++# CHECK: encoding: [0x56,0x7c,0x7b,0x75] ++xvshuf.w $xr22, $xr2, $xr31 ++ ++# CHECK: xvshuf.d $xr15, $xr3, $xr26 ++# CHECK: encoding: [0x6f,0xe8,0x7b,0x75] ++xvshuf.d $xr15, $xr3, $xr26 ++ ++# CHECK: xvperm.w $xr21, $xr23, $xr24 ++# CHECK: encoding: [0xf5,0x62,0x7d,0x75] ++xvperm.w $xr21, $xr23, $xr24 ++ ++# CHECK: xvseqi.b $xr28, $xr5, 1 ++# CHECK: encoding: [0xbc,0x04,0x80,0x76] ++xvseqi.b $xr28, $xr5, 1 ++ ++# CHECK: xvseqi.h $xr19, $xr9, -5 ++# CHECK: encoding: [0x33,0xed,0x80,0x76] ++xvseqi.h $xr19, $xr9, -5 ++ ++# CHECK: xvseqi.w $xr8, $xr18, -2 ++# CHECK: encoding: [0x48,0x7a,0x81,0x76] ++xvseqi.w $xr8, $xr18, -2 ++ ++# CHECK: xvseqi.d $xr2, $xr22, -4 ++# CHECK: encoding: [0xc2,0xf2,0x81,0x76] ++xvseqi.d $xr2, $xr22, -4 ++ ++# CHECK: xvslei.b $xr4, $xr21, -10 ++# CHECK: encoding: [0xa4,0x5a,0x82,0x76] ++xvslei.b $xr4, $xr21, -10 ++ ++# CHECK: xvslei.h $xr17, $xr20, -12 ++# CHECK: encoding: [0x91,0xd2,0x82,0x76] ++xvslei.h $xr17, $xr20, -12 ++ ++# CHECK: xvslei.w $xr9, $xr20, -7 ++# CHECK: encoding: [0x89,0x66,0x83,0x76] ++xvslei.w $xr9, $xr20, -7 ++ ++# CHECK: xvslei.d $xr19, $xr30, 10 ++# CHECK: encoding: [0xd3,0xab,0x83,0x76] ++xvslei.d $xr19, $xr30, 10 ++ ++# CHECK: xvslei.bu $xr4, $xr26, 1 ++# CHECK: encoding: [0x44,0x07,0x84,0x76] ++xvslei.bu $xr4, $xr26, 1 ++ ++# CHECK: xvslei.hu $xr11, $xr8, 4 ++# CHECK: encoding: [0x0b,0x91,0x84,0x76] ++xvslei.hu $xr11, $xr8, 4 ++ ++# CHECK: xvslei.wu $xr18, $xr12, 31 ++# CHECK: encoding: [0x92,0x7d,0x85,0x76] ++xvslei.wu $xr18, $xr12, 31 ++ ++# CHECK: xvslei.du $xr30, $xr7, 26 ++# CHECK: encoding: [0xfe,0xe8,0x85,0x76] ++xvslei.du $xr30, $xr7, 26 ++ ++# CHECK: xvslti.b $xr11, $xr29, 2 ++# CHECK: encoding: [0xab,0x0b,0x86,0x76] ++xvslti.b $xr11, $xr29, 2 ++ ++# CHECK: xvslti.h $xr6, $xr27, 8 ++# CHECK: encoding: [0x66,0xa3,0x86,0x76] ++xvslti.h $xr6, $xr27, 8 ++ ++# CHECK: xvslti.w $xr21, $xr23, 1 ++# CHECK: encoding: [0xf5,0x06,0x87,0x76] ++xvslti.w $xr21, $xr23, 1 ++ ++# CHECK: xvslti.d $xr18, $xr31, -5 ++# CHECK: encoding: [0xf2,0xef,0x87,0x76] ++xvslti.d $xr18, $xr31, -5 ++ ++# CHECK: xvslti.bu $xr27, $xr12, 17 ++# CHECK: encoding: [0x9b,0x45,0x88,0x76] ++xvslti.bu $xr27, $xr12, 17 ++ ++# CHECK: xvslti.hu $xr18, $xr14, 12 ++# CHECK: encoding: [0xd2,0xb1,0x88,0x76] ++xvslti.hu $xr18, $xr14, 12 ++ ++# CHECK: xvslti.wu $xr4, $xr12, 14 ++# CHECK: encoding: [0x84,0x39,0x89,0x76] ++xvslti.wu $xr4, $xr12, 14 ++ ++# CHECK: xvslti.du $xr26, $xr0, 24 ++# CHECK: encoding: [0x1a,0xe0,0x89,0x76] ++xvslti.du $xr26, $xr0, 24 ++ ++# CHECK: xvaddi.bu $xr30, $xr2, 5 ++# CHECK: encoding: [0x5e,0x14,0x8a,0x76] ++xvaddi.bu $xr30, $xr2, 5 ++ ++# CHECK: xvaddi.hu $xr22, $xr17, 9 ++# CHECK: encoding: [0x36,0xa6,0x8a,0x76] ++xvaddi.hu $xr22, $xr17, 9 ++ ++# CHECK: xvaddi.wu $xr3, $xr26, 29 ++# CHECK: encoding: [0x43,0x77,0x8b,0x76] ++xvaddi.wu $xr3, $xr26, 29 ++ ++# CHECK: xvaddi.du $xr0, $xr20, 30 ++# CHECK: encoding: [0x80,0xfa,0x8b,0x76] ++xvaddi.du $xr0, $xr20, 30 ++ ++# CHECK: xvsubi.bu $xr0, $xr20, 7 ++# CHECK: encoding: [0x80,0x1e,0x8c,0x76] ++xvsubi.bu $xr0, $xr20, 7 ++ ++# CHECK: xvsubi.hu $xr4, $xr24, 18 ++# CHECK: encoding: [0x04,0xcb,0x8c,0x76] ++xvsubi.hu $xr4, $xr24, 18 ++ ++# CHECK: xvsubi.wu $xr1, $xr26, 26 ++# CHECK: encoding: [0x41,0x6b,0x8d,0x76] ++xvsubi.wu $xr1, $xr26, 26 ++ ++# CHECK: xvsubi.du $xr9, $xr28, 8 ++# CHECK: encoding: [0x89,0xa3,0x8d,0x76] ++xvsubi.du $xr9, $xr28, 8 ++ ++# CHECK: xvbsll.v $xr0, $xr21, 8 ++# CHECK: encoding: [0xa0,0x22,0x8e,0x76] ++xvbsll.v $xr0, $xr21, 8 ++ ++# CHECK: xvbsrl.v $xr4, $xr8, 28 ++# CHECK: encoding: [0x04,0xf1,0x8e,0x76] ++xvbsrl.v $xr4, $xr8, 28 ++ ++# CHECK: xvmaxi.b $xr8, $xr1, -14 ++# CHECK: encoding: [0x28,0x48,0x90,0x76] ++xvmaxi.b $xr8, $xr1, -14 ++ ++# CHECK: xvmaxi.h $xr19, $xr12, -16 ++# CHECK: encoding: [0x93,0xc1,0x90,0x76] ++xvmaxi.h $xr19, $xr12, -16 ++ ++# CHECK: xvmaxi.w $xr27, $xr1, 5 ++# CHECK: encoding: [0x3b,0x14,0x91,0x76] ++xvmaxi.w $xr27, $xr1, 5 ++ ++# CHECK: xvmaxi.d $xr6, $xr7, 3 ++# CHECK: encoding: [0xe6,0x8c,0x91,0x76] ++xvmaxi.d $xr6, $xr7, 3 ++ ++# CHECK: xvmini.b $xr10, $xr6, 5 ++# CHECK: encoding: [0xca,0x14,0x92,0x76] ++xvmini.b $xr10, $xr6, 5 ++ ++# CHECK: xvmini.h $xr8, $xr18, -12 ++# CHECK: encoding: [0x48,0xd2,0x92,0x76] ++xvmini.h $xr8, $xr18, -12 ++ ++# CHECK: xvmini.w $xr31, $xr13, -7 ++# CHECK: encoding: [0xbf,0x65,0x93,0x76] ++xvmini.w $xr31, $xr13, -7 ++ ++# CHECK: xvmini.d $xr15, $xr27, 9 ++# CHECK: encoding: [0x6f,0xa7,0x93,0x76] ++xvmini.d $xr15, $xr27, 9 ++ ++# CHECK: xvmaxi.bu $xr5, $xr17, 22 ++# CHECK: encoding: [0x25,0x5a,0x94,0x76] ++xvmaxi.bu $xr5, $xr17, 22 ++ ++# CHECK: xvmaxi.hu $xr6, $xr3, 4 ++# CHECK: encoding: [0x66,0x90,0x94,0x76] ++xvmaxi.hu $xr6, $xr3, 4 ++ ++# CHECK: xvmaxi.wu $xr26, $xr12, 17 ++# CHECK: encoding: [0x9a,0x45,0x95,0x76] ++xvmaxi.wu $xr26, $xr12, 17 ++ ++# CHECK: xvmaxi.du $xr30, $xr11, 30 ++# CHECK: encoding: [0x7e,0xf9,0x95,0x76] ++xvmaxi.du $xr30, $xr11, 30 ++ ++# CHECK: xvmini.bu $xr15, $xr8, 7 ++# CHECK: encoding: [0x0f,0x1d,0x96,0x76] ++xvmini.bu $xr15, $xr8, 7 ++ ++# CHECK: xvmini.hu $xr18, $xr25, 1 ++# CHECK: encoding: [0x32,0x87,0x96,0x76] ++xvmini.hu $xr18, $xr25, 1 ++ ++# CHECK: xvmini.wu $xr16, $xr28, 0 ++# CHECK: encoding: [0x90,0x03,0x97,0x76] ++xvmini.wu $xr16, $xr28, 0 ++ ++# CHECK: xvmini.du $xr10, $xr19, 29 ++# CHECK: encoding: [0x6a,0xf6,0x97,0x76] ++xvmini.du $xr10, $xr19, 29 ++ ++# CHECK: xvfrstpi.b $xr8, $xr25, 2 ++# CHECK: encoding: [0x28,0x0b,0x9a,0x76] ++xvfrstpi.b $xr8, $xr25, 2 ++ ++# CHECK: xvfrstpi.h $xr28, $xr19, 26 ++# CHECK: encoding: [0x7c,0xea,0x9a,0x76] ++xvfrstpi.h $xr28, $xr19, 26 ++ ++# CHECK: xvclo.b $xr2, $xr8 ++# CHECK: encoding: [0x02,0x01,0x9c,0x76] ++xvclo.b $xr2, $xr8 ++ ++# CHECK: xvclo.h $xr10, $xr9 ++# CHECK: encoding: [0x2a,0x05,0x9c,0x76] ++xvclo.h $xr10, $xr9 ++ ++# CHECK: xvclo.w $xr2, $xr31 ++# CHECK: encoding: [0xe2,0x0b,0x9c,0x76] ++xvclo.w $xr2, $xr31 ++ ++# CHECK: xvclo.d $xr21, $xr24 ++# CHECK: encoding: [0x15,0x0f,0x9c,0x76] ++xvclo.d $xr21, $xr24 ++ ++# CHECK: xvclz.b $xr13, $xr24 ++# CHECK: encoding: [0x0d,0x13,0x9c,0x76] ++xvclz.b $xr13, $xr24 ++ ++# CHECK: xvclz.h $xr4, $xr31 ++# CHECK: encoding: [0xe4,0x17,0x9c,0x76] ++xvclz.h $xr4, $xr31 ++ ++# CHECK: xvclz.w $xr7, $xr1 ++# CHECK: encoding: [0x27,0x18,0x9c,0x76] ++xvclz.w $xr7, $xr1 ++ ++# CHECK: xvclz.d $xr13, $xr22 ++# CHECK: encoding: [0xcd,0x1e,0x9c,0x76] ++xvclz.d $xr13, $xr22 ++ ++# CHECK: xvpcnt.b $xr9, $xr26 ++# CHECK: encoding: [0x49,0x23,0x9c,0x76] ++xvpcnt.b $xr9, $xr26 ++ ++# CHECK: xvpcnt.h $xr10, $xr3 ++# CHECK: encoding: [0x6a,0x24,0x9c,0x76] ++xvpcnt.h $xr10, $xr3 ++ ++# CHECK: xvpcnt.w $xr24, $xr7 ++# CHECK: encoding: [0xf8,0x28,0x9c,0x76] ++xvpcnt.w $xr24, $xr7 ++ ++# CHECK: xvpcnt.d $xr5, $xr8 ++# CHECK: encoding: [0x05,0x2d,0x9c,0x76] ++xvpcnt.d $xr5, $xr8 ++ ++# CHECK: xvneg.b $xr19, $xr11 ++# CHECK: encoding: [0x73,0x31,0x9c,0x76] ++xvneg.b $xr19, $xr11 ++ ++# CHECK: xvneg.h $xr21, $xr21 ++# CHECK: encoding: [0xb5,0x36,0x9c,0x76] ++xvneg.h $xr21, $xr21 ++ ++# CHECK: xvneg.w $xr19, $xr17 ++# CHECK: encoding: [0x33,0x3a,0x9c,0x76] ++xvneg.w $xr19, $xr17 ++ ++# CHECK: xvneg.d $xr31, $xr29 ++# CHECK: encoding: [0xbf,0x3f,0x9c,0x76] ++xvneg.d $xr31, $xr29 ++ ++# CHECK: xvmskltz.b $xr22, $xr27 ++# CHECK: encoding: [0x76,0x43,0x9c,0x76] ++xvmskltz.b $xr22, $xr27 ++ ++# CHECK: xvmskltz.h $xr5, $xr0 ++# CHECK: encoding: [0x05,0x44,0x9c,0x76] ++xvmskltz.h $xr5, $xr0 ++ ++# CHECK: xvmskltz.w $xr24, $xr28 ++# CHECK: encoding: [0x98,0x4b,0x9c,0x76] ++xvmskltz.w $xr24, $xr28 ++ ++# CHECK: xvmskltz.d $xr25, $xr2 ++# CHECK: encoding: [0x59,0x4c,0x9c,0x76] ++xvmskltz.d $xr25, $xr2 ++ ++# CHECK: xvmskgez.b $xr30, $xr30 ++# CHECK: encoding: [0xde,0x53,0x9c,0x76] ++xvmskgez.b $xr30, $xr30 ++ ++# CHECK: xvmsknz.b $xr5, $xr20 ++# CHECK: encoding: [0x85,0x62,0x9c,0x76] ++xvmsknz.b $xr5, $xr20 ++ ++# CHECK: xvseteqz.v $fcc1, $xr25 ++# CHECK: encoding: [0x21,0x9b,0x9c,0x76] ++xvseteqz.v $fcc1, $xr25 ++ ++# CHECK: xvsetnez.v $fcc5, $xr13 ++# CHECK: encoding: [0xa5,0x9d,0x9c,0x76] ++xvsetnez.v $fcc5, $xr13 ++ ++# CHECK: xvsetanyeqz.b $fcc0, $xr4 ++# CHECK: encoding: [0x80,0xa0,0x9c,0x76] ++xvsetanyeqz.b $fcc0, $xr4 ++ ++# CHECK: xvsetanyeqz.h $fcc0, $xr31 ++# CHECK: encoding: [0xe0,0xa7,0x9c,0x76] ++xvsetanyeqz.h $fcc0, $xr31 ++ ++# CHECK: xvsetanyeqz.w $fcc2, $xr30 ++# CHECK: encoding: [0xc2,0xab,0x9c,0x76] ++xvsetanyeqz.w $fcc2, $xr30 ++ ++# CHECK: xvsetanyeqz.d $fcc3, $xr31 ++# CHECK: encoding: [0xe3,0xaf,0x9c,0x76] ++xvsetanyeqz.d $fcc3, $xr31 ++ ++# CHECK: xvsetallnez.b $fcc1, $xr21 ++# CHECK: encoding: [0xa1,0xb2,0x9c,0x76] ++xvsetallnez.b $fcc1, $xr21 ++ ++# CHECK: xvsetallnez.h $fcc0, $xr21 ++# CHECK: encoding: [0xa0,0xb6,0x9c,0x76] ++xvsetallnez.h $fcc0, $xr21 ++ ++# CHECK: xvsetallnez.w $fcc0, $xr0 ++# CHECK: encoding: [0x00,0xb8,0x9c,0x76] ++xvsetallnez.w $fcc0, $xr0 ++ ++# CHECK: xvsetallnez.d $fcc1, $xr31 ++# CHECK: encoding: [0xe1,0xbf,0x9c,0x76] ++xvsetallnez.d $fcc1, $xr31 ++ ++# CHECK: xvflogb.s $xr21, $xr4 ++# CHECK: encoding: [0x95,0xc4,0x9c,0x76] ++xvflogb.s $xr21, $xr4 ++ ++# CHECK: xvflogb.d $xr8, $xr20 ++# CHECK: encoding: [0x88,0xca,0x9c,0x76] ++xvflogb.d $xr8, $xr20 ++ ++# CHECK: xvfclass.s $xr15, $xr29 ++# CHECK: encoding: [0xaf,0xd7,0x9c,0x76] ++xvfclass.s $xr15, $xr29 ++ ++# CHECK: xvfclass.d $xr7, $xr14 ++# CHECK: encoding: [0xc7,0xd9,0x9c,0x76] ++xvfclass.d $xr7, $xr14 ++ ++# CHECK: xvfsqrt.s $xr28, $xr19 ++# CHECK: encoding: [0x7c,0xe6,0x9c,0x76] ++xvfsqrt.s $xr28, $xr19 ++ ++# CHECK: xvfsqrt.d $xr11, $xr31 ++# CHECK: encoding: [0xeb,0xeb,0x9c,0x76] ++xvfsqrt.d $xr11, $xr31 ++ ++# CHECK: xvfrecip.s $xr6, $xr23 ++# CHECK: encoding: [0xe6,0xf6,0x9c,0x76] ++xvfrecip.s $xr6, $xr23 ++ ++# CHECK: xvfrecip.d $xr0, $xr24 ++# CHECK: encoding: [0x00,0xfb,0x9c,0x76] ++xvfrecip.d $xr0, $xr24 ++ ++# CHECK: xvfrsqrt.s $xr8, $xr16 ++# CHECK: encoding: [0x08,0x06,0x9d,0x76] ++xvfrsqrt.s $xr8, $xr16 ++ ++# CHECK: xvfrsqrt.d $xr15, $xr17 ++# CHECK: encoding: [0x2f,0x0a,0x9d,0x76] ++xvfrsqrt.d $xr15, $xr17 ++ ++# CHECK: xvfrint.s $xr4, $xr25 ++# CHECK: encoding: [0x24,0x37,0x9d,0x76] ++xvfrint.s $xr4, $xr25 ++ ++# CHECK: xvfrint.d $xr1, $xr20 ++# CHECK: encoding: [0x81,0x3a,0x9d,0x76] ++xvfrint.d $xr1, $xr20 ++ ++# CHECK: xvfrintrm.s $xr29, $xr16 ++# CHECK: encoding: [0x1d,0x46,0x9d,0x76] ++xvfrintrm.s $xr29, $xr16 ++ ++# CHECK: xvfrintrm.d $xr4, $xr10 ++# CHECK: encoding: [0x44,0x49,0x9d,0x76] ++xvfrintrm.d $xr4, $xr10 ++ ++# CHECK: xvfrintrp.s $xr13, $xr31 ++# CHECK: encoding: [0xed,0x57,0x9d,0x76] ++xvfrintrp.s $xr13, $xr31 ++ ++# CHECK: xvfrintrp.d $xr20, $xr11 ++# CHECK: encoding: [0x74,0x59,0x9d,0x76] ++xvfrintrp.d $xr20, $xr11 ++ ++# CHECK: xvfrintrz.s $xr27, $xr13 ++# CHECK: encoding: [0xbb,0x65,0x9d,0x76] ++xvfrintrz.s $xr27, $xr13 ++ ++# CHECK: xvfrintrz.d $xr17, $xr25 ++# CHECK: encoding: [0x31,0x6b,0x9d,0x76] ++xvfrintrz.d $xr17, $xr25 ++ ++# CHECK: xvfrintrne.s $xr14, $xr8 ++# CHECK: encoding: [0x0e,0x75,0x9d,0x76] ++xvfrintrne.s $xr14, $xr8 ++ ++# CHECK: xvfrintrne.d $xr23, $xr26 ++# CHECK: encoding: [0x57,0x7b,0x9d,0x76] ++xvfrintrne.d $xr23, $xr26 ++ ++# CHECK: xvfcvtl.s.h $xr4, $xr23 ++# CHECK: encoding: [0xe4,0xea,0x9d,0x76] ++xvfcvtl.s.h $xr4, $xr23 ++ ++# CHECK: xvfcvth.s.h $xr14, $xr11 ++# CHECK: encoding: [0x6e,0xed,0x9d,0x76] ++xvfcvth.s.h $xr14, $xr11 ++ ++# CHECK: xvfcvtl.d.s $xr26, $xr31 ++# CHECK: encoding: [0xfa,0xf3,0x9d,0x76] ++xvfcvtl.d.s $xr26, $xr31 ++ ++# CHECK: xvfcvth.d.s $xr13, $xr28 ++# CHECK: encoding: [0x8d,0xf7,0x9d,0x76] ++xvfcvth.d.s $xr13, $xr28 ++ ++# CHECK: xvffint.s.w $xr14, $xr28 ++# CHECK: encoding: [0x8e,0x03,0x9e,0x76] ++xvffint.s.w $xr14, $xr28 ++ ++# CHECK: xvffint.s.wu $xr0, $xr8 ++# CHECK: encoding: [0x00,0x05,0x9e,0x76] ++xvffint.s.wu $xr0, $xr8 ++ ++# CHECK: xvffint.d.l $xr5, $xr27 ++# CHECK: encoding: [0x65,0x0b,0x9e,0x76] ++xvffint.d.l $xr5, $xr27 ++ ++# CHECK: xvffint.d.lu $xr29, $xr18 ++# CHECK: encoding: [0x5d,0x0e,0x9e,0x76] ++xvffint.d.lu $xr29, $xr18 ++ ++# CHECK: xvffintl.d.w $xr9, $xr20 ++# CHECK: encoding: [0x89,0x12,0x9e,0x76] ++xvffintl.d.w $xr9, $xr20 ++ ++# CHECK: xvffinth.d.w $xr11, $xr13 ++# CHECK: encoding: [0xab,0x15,0x9e,0x76] ++xvffinth.d.w $xr11, $xr13 ++ ++# CHECK: xvftint.w.s $xr6, $xr4 ++# CHECK: encoding: [0x86,0x30,0x9e,0x76] ++xvftint.w.s $xr6, $xr4 ++ ++# CHECK: xvftint.l.d $xr11, $xr22 ++# CHECK: encoding: [0xcb,0x36,0x9e,0x76] ++xvftint.l.d $xr11, $xr22 ++ ++# CHECK: xvftintrm.w.s $xr20, $xr21 ++# CHECK: encoding: [0xb4,0x3a,0x9e,0x76] ++xvftintrm.w.s $xr20, $xr21 ++ ++# CHECK: xvftintrm.l.d $xr28, $xr27 ++# CHECK: encoding: [0x7c,0x3f,0x9e,0x76] ++xvftintrm.l.d $xr28, $xr27 ++ ++# CHECK: xvftintrp.w.s $xr14, $xr16 ++# CHECK: encoding: [0x0e,0x42,0x9e,0x76] ++xvftintrp.w.s $xr14, $xr16 ++ ++# CHECK: xvftintrp.l.d $xr14, $xr25 ++# CHECK: encoding: [0x2e,0x47,0x9e,0x76] ++xvftintrp.l.d $xr14, $xr25 ++ ++# CHECK: xvftintrz.w.s $xr5, $xr30 ++# CHECK: encoding: [0xc5,0x4b,0x9e,0x76] ++xvftintrz.w.s $xr5, $xr30 ++ ++# CHECK: xvftintrz.l.d $xr11, $xr19 ++# CHECK: encoding: [0x6b,0x4e,0x9e,0x76] ++xvftintrz.l.d $xr11, $xr19 ++ ++# CHECK: xvftintrne.w.s $xr27, $xr23 ++# CHECK: encoding: [0xfb,0x52,0x9e,0x76] ++xvftintrne.w.s $xr27, $xr23 ++ ++# CHECK: xvftintrne.l.d $xr27, $xr13 ++# CHECK: encoding: [0xbb,0x55,0x9e,0x76] ++xvftintrne.l.d $xr27, $xr13 ++ ++# CHECK: xvftint.wu.s $xr28, $xr2 ++# CHECK: encoding: [0x5c,0x58,0x9e,0x76] ++xvftint.wu.s $xr28, $xr2 ++ ++# CHECK: xvftint.lu.d $xr27, $xr12 ++# CHECK: encoding: [0x9b,0x5d,0x9e,0x76] ++xvftint.lu.d $xr27, $xr12 ++ ++# CHECK: xvftintrz.wu.s $xr21, $xr29 ++# CHECK: encoding: [0xb5,0x73,0x9e,0x76] ++xvftintrz.wu.s $xr21, $xr29 ++ ++# CHECK: xvftintrz.lu.d $xr19, $xr2 ++# CHECK: encoding: [0x53,0x74,0x9e,0x76] ++xvftintrz.lu.d $xr19, $xr2 ++ ++# CHECK: xvftintl.l.s $xr2, $xr18 ++# CHECK: encoding: [0x42,0x82,0x9e,0x76] ++xvftintl.l.s $xr2, $xr18 ++ ++# CHECK: xvftinth.l.s $xr8, $xr30 ++# CHECK: encoding: [0xc8,0x87,0x9e,0x76] ++xvftinth.l.s $xr8, $xr30 ++ ++# CHECK: xvftintrml.l.s $xr13, $xr17 ++# CHECK: encoding: [0x2d,0x8a,0x9e,0x76] ++xvftintrml.l.s $xr13, $xr17 ++ ++# CHECK: xvftintrmh.l.s $xr30, $xr26 ++# CHECK: encoding: [0x5e,0x8f,0x9e,0x76] ++xvftintrmh.l.s $xr30, $xr26 ++ ++# CHECK: xvftintrpl.l.s $xr11, $xr26 ++# CHECK: encoding: [0x4b,0x93,0x9e,0x76] ++xvftintrpl.l.s $xr11, $xr26 ++ ++# CHECK: xvftintrph.l.s $xr30, $xr11 ++# CHECK: encoding: [0x7e,0x95,0x9e,0x76] ++xvftintrph.l.s $xr30, $xr11 ++ ++# CHECK: xvftintrzl.l.s $xr25, $xr7 ++# CHECK: encoding: [0xf9,0x98,0x9e,0x76] ++xvftintrzl.l.s $xr25, $xr7 ++ ++# CHECK: xvftintrzh.l.s $xr12, $xr5 ++# CHECK: encoding: [0xac,0x9c,0x9e,0x76] ++xvftintrzh.l.s $xr12, $xr5 ++ ++# CHECK: xvftintrnel.l.s $xr8, $xr24 ++# CHECK: encoding: [0x08,0xa3,0x9e,0x76] ++xvftintrnel.l.s $xr8, $xr24 ++ ++# CHECK: xvftintrneh.l.s $xr25, $xr24 ++# CHECK: encoding: [0x19,0xa7,0x9e,0x76] ++xvftintrneh.l.s $xr25, $xr24 ++ ++# CHECK: xvexth.h.b $xr23, $xr5 ++# CHECK: encoding: [0xb7,0xe0,0x9e,0x76] ++xvexth.h.b $xr23, $xr5 ++ ++# CHECK: xvexth.w.h $xr25, $xr6 ++# CHECK: encoding: [0xd9,0xe4,0x9e,0x76] ++xvexth.w.h $xr25, $xr6 ++ ++# CHECK: xvexth.d.w $xr7, $xr27 ++# CHECK: encoding: [0x67,0xeb,0x9e,0x76] ++xvexth.d.w $xr7, $xr27 ++ ++# CHECK: xvexth.q.d $xr14, $xr10 ++# CHECK: encoding: [0x4e,0xed,0x9e,0x76] ++xvexth.q.d $xr14, $xr10 ++ ++# CHECK: xvexth.hu.bu $xr0, $xr21 ++# CHECK: encoding: [0xa0,0xf2,0x9e,0x76] ++xvexth.hu.bu $xr0, $xr21 ++ ++# CHECK: xvexth.wu.hu $xr15, $xr22 ++# CHECK: encoding: [0xcf,0xf6,0x9e,0x76] ++xvexth.wu.hu $xr15, $xr22 ++ ++# CHECK: xvexth.du.wu $xr24, $xr15 ++# CHECK: encoding: [0xf8,0xf9,0x9e,0x76] ++xvexth.du.wu $xr24, $xr15 ++ ++# CHECK: xvexth.qu.du $xr4, $xr2 ++# CHECK: encoding: [0x44,0xfc,0x9e,0x76] ++xvexth.qu.du $xr4, $xr2 ++ ++# CHECK: xvreplgr2vr.b $xr21, $r6 ++# CHECK: encoding: [0xd5,0x00,0x9f,0x76] ++xvreplgr2vr.b $xr21, $r6 ++ ++# CHECK: xvreplgr2vr.h $xr11, $ra ++# CHECK: encoding: [0x2b,0x04,0x9f,0x76] ++xvreplgr2vr.h $xr11, $ra ++ ++# CHECK: xvreplgr2vr.w $xr13, $r22 ++# CHECK: encoding: [0xcd,0x0a,0x9f,0x76] ++xvreplgr2vr.w $xr13, $r22 ++ ++# CHECK: xvreplgr2vr.d $xr9, $r17 ++# CHECK: encoding: [0x29,0x0e,0x9f,0x76] ++xvreplgr2vr.d $xr9, $r17 ++ ++# CHECK: vext2xv.h.b $xr18, $xr16 ++# CHECK: encoding: [0x12,0x12,0x9f,0x76] ++vext2xv.h.b $xr18, $xr16 ++ ++# CHECK: vext2xv.w.b $xr3, $xr23 ++# CHECK: encoding: [0xe3,0x16,0x9f,0x76] ++vext2xv.w.b $xr3, $xr23 ++ ++# CHECK: vext2xv.d.b $xr30, $xr16 ++# CHECK: encoding: [0x1e,0x1a,0x9f,0x76] ++vext2xv.d.b $xr30, $xr16 ++ ++# CHECK: vext2xv.w.h $xr28, $xr23 ++# CHECK: encoding: [0xfc,0x1e,0x9f,0x76] ++vext2xv.w.h $xr28, $xr23 ++ ++# CHECK: vext2xv.d.h $xr4, $xr1 ++# CHECK: encoding: [0x24,0x20,0x9f,0x76] ++vext2xv.d.h $xr4, $xr1 ++ ++# CHECK: vext2xv.d.w $xr23, $xr12 ++# CHECK: encoding: [0x97,0x25,0x9f,0x76] ++vext2xv.d.w $xr23, $xr12 ++ ++# CHECK: vext2xv.hu.bu $xr0, $xr5 ++# CHECK: encoding: [0xa0,0x28,0x9f,0x76] ++vext2xv.hu.bu $xr0, $xr5 ++ ++# CHECK: vext2xv.wu.bu $xr1, $xr4 ++# CHECK: encoding: [0x81,0x2c,0x9f,0x76] ++vext2xv.wu.bu $xr1, $xr4 ++ ++# CHECK: vext2xv.du.bu $xr17, $xr11 ++# CHECK: encoding: [0x71,0x31,0x9f,0x76] ++vext2xv.du.bu $xr17, $xr11 ++ ++# CHECK: vext2xv.wu.hu $xr28, $xr0 ++# CHECK: encoding: [0x1c,0x34,0x9f,0x76] ++vext2xv.wu.hu $xr28, $xr0 ++ ++# CHECK: vext2xv.du.hu $xr26, $xr25 ++# CHECK: encoding: [0x3a,0x3b,0x9f,0x76] ++vext2xv.du.hu $xr26, $xr25 ++ ++# CHECK: vext2xv.du.wu $xr29, $xr14 ++# CHECK: encoding: [0xdd,0x3d,0x9f,0x76] ++vext2xv.du.wu $xr29, $xr14 ++ ++# CHECK: xvhseli.d $xr3, $xr22, 13 ++# CHECK: encoding: [0xc3,0xb6,0x9f,0x76] ++xvhseli.d $xr3, $xr22, 13 ++ ++# CHECK: xvrotri.b $xr0, $xr14, 2 ++# CHECK: encoding: [0xc0,0x29,0xa0,0x76] ++xvrotri.b $xr0, $xr14, 2 ++ ++# CHECK: xvrotri.h $xr0, $xr7, 11 ++# CHECK: encoding: [0xe0,0x6c,0xa0,0x76] ++xvrotri.h $xr0, $xr7, 11 ++ ++# CHECK: xvrotri.w $xr24, $xr1, 3 ++# CHECK: encoding: [0x38,0x8c,0xa0,0x76] ++xvrotri.w $xr24, $xr1, 3 ++ ++# CHECK: xvrotri.d $xr31, $xr7, 16 ++# CHECK: encoding: [0xff,0x40,0xa1,0x76] ++xvrotri.d $xr31, $xr7, 16 ++ ++# CHECK: xvsrlri.b $xr20, $xr19, 1 ++# CHECK: encoding: [0x74,0x26,0xa4,0x76] ++xvsrlri.b $xr20, $xr19, 1 ++ ++# CHECK: xvsrlri.h $xr28, $xr1, 11 ++# CHECK: encoding: [0x3c,0x6c,0xa4,0x76] ++xvsrlri.h $xr28, $xr1, 11 ++ ++# CHECK: xvsrlri.w $xr25, $xr2, 27 ++# CHECK: encoding: [0x59,0xec,0xa4,0x76] ++xvsrlri.w $xr25, $xr2, 27 ++ ++# CHECK: xvsrlri.d $xr29, $xr9, 6 ++# CHECK: encoding: [0x3d,0x19,0xa5,0x76] ++xvsrlri.d $xr29, $xr9, 6 ++ ++# CHECK: xvsrari.b $xr7, $xr5, 2 ++# CHECK: encoding: [0xa7,0x28,0xa8,0x76] ++xvsrari.b $xr7, $xr5, 2 ++ ++# CHECK: xvsrari.h $xr0, $xr10, 9 ++# CHECK: encoding: [0x40,0x65,0xa8,0x76] ++xvsrari.h $xr0, $xr10, 9 ++ ++# CHECK: xvsrari.w $xr17, $xr24, 10 ++# CHECK: encoding: [0x11,0xab,0xa8,0x76] ++xvsrari.w $xr17, $xr24, 10 ++ ++# CHECK: xvsrari.d $xr7, $xr14, 38 ++# CHECK: encoding: [0xc7,0x99,0xa9,0x76] ++xvsrari.d $xr7, $xr14, 38 ++ ++# CHECK: xvinsgr2vr.w $xr5, $r31, 1 ++# CHECK: encoding: [0xe5,0xc7,0xeb,0x76] ++xvinsgr2vr.w $xr5, $r31, 1 ++ ++# CHECK: xvinsgr2vr.d $xr5, $r26, 1 ++# CHECK: encoding: [0x45,0xe7,0xeb,0x76] ++xvinsgr2vr.d $xr5, $r26, 1 ++ ++# CHECK: xvpickve2gr.w $r18, $xr28, 2 ++# CHECK: encoding: [0x92,0xcb,0xef,0x76] ++xvpickve2gr.w $r18, $xr28, 2 ++ ++# CHECK: xvpickve2gr.d $r20, $xr10, 1 ++# CHECK: encoding: [0x54,0xe5,0xef,0x76] ++xvpickve2gr.d $r20, $xr10, 1 ++ ++# CHECK: xvpickve2gr.wu $r9, $xr12, 6 ++# CHECK: encoding: [0x89,0xd9,0xf3,0x76] ++xvpickve2gr.wu $r9, $xr12, 6 ++ ++# CHECK: xvpickve2gr.du $r9, $xr13, 2 ++# CHECK: encoding: [0xa9,0xe9,0xf3,0x76] ++xvpickve2gr.du $r9, $xr13, 2 ++ ++# CHECK: xvrepl128vei.b $xr1, $xr30, 5 ++# CHECK: encoding: [0xc1,0x97,0xf7,0x76] ++xvrepl128vei.b $xr1, $xr30, 5 ++ ++# CHECK: xvrepl128vei.h $xr13, $xr13, 7 ++# CHECK: encoding: [0xad,0xdd,0xf7,0x76] ++xvrepl128vei.h $xr13, $xr13, 7 ++ ++# CHECK: xvrepl128vei.w $xr7, $xr13, 2 ++# CHECK: encoding: [0xa7,0xe9,0xf7,0x76] ++xvrepl128vei.w $xr7, $xr13, 2 ++ ++# CHECK: xvrepl128vei.d $xr2, $xr31, 1 ++# CHECK: encoding: [0xe2,0xf7,0xf7,0x76] ++xvrepl128vei.d $xr2, $xr31, 1 ++ ++# CHECK: xvinsve0.w $xr4, $xr13, 3 ++# CHECK: encoding: [0xa4,0xcd,0xff,0x76] ++xvinsve0.w $xr4, $xr13, 3 ++ ++# CHECK: xvinsve0.d $xr27, $xr25, 0 ++# CHECK: encoding: [0x3b,0xe3,0xff,0x76] ++xvinsve0.d $xr27, $xr25, 0 ++ ++# CHECK: xvpickve.w $xr29, $xr19, 7 ++# CHECK: encoding: [0x7d,0xde,0x03,0x77] ++xvpickve.w $xr29, $xr19, 7 ++ ++# CHECK: xvpickve.d $xr19, $xr16, 3 ++# CHECK: encoding: [0x13,0xee,0x03,0x77] ++xvpickve.d $xr19, $xr16, 3 ++ ++# CHECK: xvreplve0.b $xr5, $xr5 ++# CHECK: encoding: [0xa5,0x00,0x07,0x77] ++xvreplve0.b $xr5, $xr5 ++ ++# CHECK: xvreplve0.h $xr14, $xr24 ++# CHECK: encoding: [0x0e,0x83,0x07,0x77] ++xvreplve0.h $xr14, $xr24 ++ ++# CHECK: xvreplve0.w $xr15, $xr13 ++# CHECK: encoding: [0xaf,0xc1,0x07,0x77] ++xvreplve0.w $xr15, $xr13 ++ ++# CHECK: xvreplve0.d $xr20, $xr20 ++# CHECK: encoding: [0x94,0xe2,0x07,0x77] ++xvreplve0.d $xr20, $xr20 ++ ++# CHECK: xvreplve0.q $xr5, $xr10 ++# CHECK: encoding: [0x45,0xf1,0x07,0x77] ++xvreplve0.q $xr5, $xr10 ++ ++# CHECK: xvsllwil.h.b $xr31, $xr0, 3 ++# CHECK: encoding: [0x1f,0x2c,0x08,0x77] ++xvsllwil.h.b $xr31, $xr0, 3 ++ ++# CHECK: xvsllwil.w.h $xr21, $xr24, 7 ++# CHECK: encoding: [0x15,0x5f,0x08,0x77] ++xvsllwil.w.h $xr21, $xr24, 7 ++ ++# CHECK: xvsllwil.d.w $xr26, $xr24, 18 ++# CHECK: encoding: [0x1a,0xcb,0x08,0x77] ++xvsllwil.d.w $xr26, $xr24, 18 ++ ++# CHECK: xvextl.q.d $xr5, $xr6 ++# CHECK: encoding: [0xc5,0x00,0x09,0x77] ++xvextl.q.d $xr5, $xr6 ++ ++# CHECK: xvsllwil.hu.bu $xr13, $xr31, 6 ++# CHECK: encoding: [0xed,0x3b,0x0c,0x77] ++xvsllwil.hu.bu $xr13, $xr31, 6 ++ ++# CHECK: xvsllwil.wu.hu $xr19, $xr20, 8 ++# CHECK: encoding: [0x93,0x62,0x0c,0x77] ++xvsllwil.wu.hu $xr19, $xr20, 8 ++ ++# CHECK: xvsllwil.du.wu $xr14, $xr13, 2 ++# CHECK: encoding: [0xae,0x89,0x0c,0x77] ++xvsllwil.du.wu $xr14, $xr13, 2 ++ ++# CHECK: xvextl.qu.du $xr10, $xr7 ++# CHECK: encoding: [0xea,0x00,0x0d,0x77] ++xvextl.qu.du $xr10, $xr7 ++ ++# CHECK: xvbitclri.b $xr31, $xr21, 5 ++# CHECK: encoding: [0xbf,0x36,0x10,0x77] ++xvbitclri.b $xr31, $xr21, 5 ++ ++# CHECK: xvbitclri.h $xr26, $xr4, 2 ++# CHECK: encoding: [0x9a,0x48,0x10,0x77] ++xvbitclri.h $xr26, $xr4, 2 ++ ++# CHECK: xvbitclri.w $xr21, $xr25, 15 ++# CHECK: encoding: [0x35,0xbf,0x10,0x77] ++xvbitclri.w $xr21, $xr25, 15 ++ ++# CHECK: xvbitclri.d $xr14, $xr0, 63 ++# CHECK: encoding: [0x0e,0xfc,0x11,0x77] ++xvbitclri.d $xr14, $xr0, 63 ++ ++# CHECK: xvbitseti.b $xr16, $xr1, 5 ++# CHECK: encoding: [0x30,0x34,0x14,0x77] ++xvbitseti.b $xr16, $xr1, 5 ++ ++# CHECK: xvbitseti.h $xr19, $xr30, 3 ++# CHECK: encoding: [0xd3,0x4f,0x14,0x77] ++xvbitseti.h $xr19, $xr30, 3 ++ ++# CHECK: xvbitseti.w $xr18, $xr22, 27 ++# CHECK: encoding: [0xd2,0xee,0x14,0x77] ++xvbitseti.w $xr18, $xr22, 27 ++ ++# CHECK: xvbitseti.d $xr15, $xr1, 40 ++# CHECK: encoding: [0x2f,0xa0,0x15,0x77] ++xvbitseti.d $xr15, $xr1, 40 ++ ++# CHECK: xvbitrevi.b $xr23, $xr5, 0 ++# CHECK: encoding: [0xb7,0x20,0x18,0x77] ++xvbitrevi.b $xr23, $xr5, 0 ++ ++# CHECK: xvbitrevi.h $xr5, $xr2, 7 ++# CHECK: encoding: [0x45,0x5c,0x18,0x77] ++xvbitrevi.h $xr5, $xr2, 7 ++ ++# CHECK: xvbitrevi.w $xr23, $xr6, 12 ++# CHECK: encoding: [0xd7,0xb0,0x18,0x77] ++xvbitrevi.w $xr23, $xr6, 12 ++ ++# CHECK: xvbitrevi.d $xr18, $xr14, 33 ++# CHECK: encoding: [0xd2,0x85,0x19,0x77] ++xvbitrevi.d $xr18, $xr14, 33 ++ ++# CHECK: xvsat.b $xr27, $xr26, 4 ++# CHECK: encoding: [0x5b,0x33,0x24,0x77] ++xvsat.b $xr27, $xr26, 4 ++ ++# CHECK: xvsat.h $xr4, $xr21, 5 ++# CHECK: encoding: [0xa4,0x56,0x24,0x77] ++xvsat.h $xr4, $xr21, 5 ++ ++# CHECK: xvsat.w $xr29, $xr27, 10 ++# CHECK: encoding: [0x7d,0xab,0x24,0x77] ++xvsat.w $xr29, $xr27, 10 ++ ++# CHECK: xvsat.d $xr14, $xr0, 60 ++# CHECK: encoding: [0x0e,0xf0,0x25,0x77] ++xvsat.d $xr14, $xr0, 60 ++ ++# CHECK: xvsat.bu $xr31, $xr25, 3 ++# CHECK: encoding: [0x3f,0x2f,0x28,0x77] ++xvsat.bu $xr31, $xr25, 3 ++ ++# CHECK: xvsat.hu $xr17, $xr4, 14 ++# CHECK: encoding: [0x91,0x78,0x28,0x77] ++xvsat.hu $xr17, $xr4, 14 ++ ++# CHECK: xvsat.wu $xr17, $xr17, 4 ++# CHECK: encoding: [0x31,0x92,0x28,0x77] ++xvsat.wu $xr17, $xr17, 4 ++ ++# CHECK: xvsat.du $xr11, $xr0, 43 ++# CHECK: encoding: [0x0b,0xac,0x29,0x77] ++xvsat.du $xr11, $xr0, 43 ++ ++# CHECK: xvslli.b $xr24, $xr24, 2 ++# CHECK: encoding: [0x18,0x2b,0x2c,0x77] ++xvslli.b $xr24, $xr24, 2 ++ ++# CHECK: xvslli.h $xr23, $xr9, 7 ++# CHECK: encoding: [0x37,0x5d,0x2c,0x77] ++xvslli.h $xr23, $xr9, 7 ++ ++# CHECK: xvslli.w $xr13, $xr12, 16 ++# CHECK: encoding: [0x8d,0xc1,0x2c,0x77] ++xvslli.w $xr13, $xr12, 16 ++ ++# CHECK: xvslli.d $xr11, $xr22, 17 ++# CHECK: encoding: [0xcb,0x46,0x2d,0x77] ++xvslli.d $xr11, $xr22, 17 ++ ++# CHECK: xvsrli.b $xr9, $xr14, 1 ++# CHECK: encoding: [0xc9,0x25,0x30,0x77] ++xvsrli.b $xr9, $xr14, 1 ++ ++# CHECK: xvsrli.h $xr22, $xr20, 15 ++# CHECK: encoding: [0x96,0x7e,0x30,0x77] ++xvsrli.h $xr22, $xr20, 15 ++ ++# CHECK: xvsrli.w $xr5, $xr30, 20 ++# CHECK: encoding: [0xc5,0xd3,0x30,0x77] ++xvsrli.w $xr5, $xr30, 20 ++ ++# CHECK: xvsrli.d $xr1, $xr16, 58 ++# CHECK: encoding: [0x01,0xea,0x31,0x77] ++xvsrli.d $xr1, $xr16, 58 ++ ++# CHECK: xvsrai.b $xr18, $xr6, 2 ++# CHECK: encoding: [0xd2,0x28,0x34,0x77] ++xvsrai.b $xr18, $xr6, 2 ++ ++# CHECK: xvsrai.h $xr21, $xr16, 12 ++# CHECK: encoding: [0x15,0x72,0x34,0x77] ++xvsrai.h $xr21, $xr16, 12 ++ ++# CHECK: xvsrai.w $xr13, $xr17, 17 ++# CHECK: encoding: [0x2d,0xc6,0x34,0x77] ++xvsrai.w $xr13, $xr17, 17 ++ ++# CHECK: xvsrai.d $xr3, $xr12, 51 ++# CHECK: encoding: [0x83,0xcd,0x35,0x77] ++xvsrai.d $xr3, $xr12, 51 ++ ++# CHECK: xvsrlni.b.h $xr1, $xr7, 4 ++# CHECK: encoding: [0xe1,0x50,0x40,0x77] ++xvsrlni.b.h $xr1, $xr7, 4 ++ ++# CHECK: xvsrlni.h.w $xr16, $xr21, 25 ++# CHECK: encoding: [0xb0,0xe6,0x40,0x77] ++xvsrlni.h.w $xr16, $xr21, 25 ++ ++# CHECK: xvsrlni.w.d $xr13, $xr10, 48 ++# CHECK: encoding: [0x4d,0xc1,0x41,0x77] ++xvsrlni.w.d $xr13, $xr10, 48 ++ ++# CHECK: xvsrlni.d.q $xr17, $xr12, 126 ++# CHECK: encoding: [0x91,0xf9,0x43,0x77] ++xvsrlni.d.q $xr17, $xr12, 126 ++ ++# CHECK: xvsrlrni.b.h $xr17, $xr19, 15 ++# CHECK: encoding: [0x71,0x7e,0x44,0x77] ++xvsrlrni.b.h $xr17, $xr19, 15 ++ ++# CHECK: xvsrlrni.h.w $xr21, $xr24, 14 ++# CHECK: encoding: [0x15,0xbb,0x44,0x77] ++xvsrlrni.h.w $xr21, $xr24, 14 ++ ++# CHECK: xvsrlrni.w.d $xr20, $xr31, 3 ++# CHECK: encoding: [0xf4,0x0f,0x45,0x77] ++xvsrlrni.w.d $xr20, $xr31, 3 ++ ++# CHECK: xvsrlrni.d.q $xr28, $xr24, 76 ++# CHECK: encoding: [0x1c,0x33,0x47,0x77] ++xvsrlrni.d.q $xr28, $xr24, 76 ++ ++# CHECK: xvssrlni.b.h $xr26, $xr7, 7 ++# CHECK: encoding: [0xfa,0x5c,0x48,0x77] ++xvssrlni.b.h $xr26, $xr7, 7 ++ ++# CHECK: xvssrlni.h.w $xr27, $xr28, 25 ++# CHECK: encoding: [0x9b,0xe7,0x48,0x77] ++xvssrlni.h.w $xr27, $xr28, 25 ++ ++# CHECK: xvssrlni.w.d $xr4, $xr8, 16 ++# CHECK: encoding: [0x04,0x41,0x49,0x77] ++xvssrlni.w.d $xr4, $xr8, 16 ++ ++# CHECK: xvssrlni.d.q $xr14, $xr17, 84 ++# CHECK: encoding: [0x2e,0x52,0x4b,0x77] ++xvssrlni.d.q $xr14, $xr17, 84 ++ ++# CHECK: xvssrlni.bu.h $xr17, $xr6, 2 ++# CHECK: encoding: [0xd1,0x48,0x4c,0x77] ++xvssrlni.bu.h $xr17, $xr6, 2 ++ ++# CHECK: xvssrlni.hu.w $xr6, $xr26, 3 ++# CHECK: encoding: [0x46,0x8f,0x4c,0x77] ++xvssrlni.hu.w $xr6, $xr26, 3 ++ ++# CHECK: xvssrlni.wu.d $xr10, $xr18, 54 ++# CHECK: encoding: [0x4a,0xda,0x4d,0x77] ++xvssrlni.wu.d $xr10, $xr18, 54 ++ ++# CHECK: xvssrlni.du.q $xr29, $xr26, 70 ++# CHECK: encoding: [0x5d,0x1b,0x4f,0x77] ++xvssrlni.du.q $xr29, $xr26, 70 ++ ++# CHECK: xvssrlrni.b.h $xr6, $xr9, 6 ++# CHECK: encoding: [0x26,0x59,0x50,0x77] ++xvssrlrni.b.h $xr6, $xr9, 6 ++ ++# CHECK: xvssrlrni.h.w $xr22, $xr8, 1 ++# CHECK: encoding: [0x16,0x85,0x50,0x77] ++xvssrlrni.h.w $xr22, $xr8, 1 ++ ++# CHECK: xvssrlrni.w.d $xr28, $xr9, 28 ++# CHECK: encoding: [0x3c,0x71,0x51,0x77] ++xvssrlrni.w.d $xr28, $xr9, 28 ++ ++# CHECK: xvssrlrni.d.q $xr20, $xr27, 104 ++# CHECK: encoding: [0x74,0xa3,0x53,0x77] ++xvssrlrni.d.q $xr20, $xr27, 104 ++ ++# CHECK: xvssrlrni.bu.h $xr25, $xr4, 12 ++# CHECK: encoding: [0x99,0x70,0x54,0x77] ++xvssrlrni.bu.h $xr25, $xr4, 12 ++ ++# CHECK: xvssrlrni.hu.w $xr21, $xr29, 5 ++# CHECK: encoding: [0xb5,0x97,0x54,0x77] ++xvssrlrni.hu.w $xr21, $xr29, 5 ++ ++# CHECK: xvssrlrni.wu.d $xr1, $xr16, 54 ++# CHECK: encoding: [0x01,0xda,0x55,0x77] ++xvssrlrni.wu.d $xr1, $xr16, 54 ++ ++# CHECK: xvssrlrni.du.q $xr29, $xr7, 25 ++# CHECK: encoding: [0xfd,0x64,0x56,0x77] ++xvssrlrni.du.q $xr29, $xr7, 25 ++ ++# CHECK: xvsrani.b.h $xr16, $xr25, 4 ++# CHECK: encoding: [0x30,0x53,0x58,0x77] ++xvsrani.b.h $xr16, $xr25, 4 ++ ++# CHECK: xvsrani.h.w $xr13, $xr10, 6 ++# CHECK: encoding: [0x4d,0x99,0x58,0x77] ++xvsrani.h.w $xr13, $xr10, 6 ++ ++# CHECK: xvsrani.w.d $xr7, $xr21, 53 ++# CHECK: encoding: [0xa7,0xd6,0x59,0x77] ++xvsrani.w.d $xr7, $xr21, 53 ++ ++# CHECK: xvsrani.d.q $xr26, $xr18, 55 ++# CHECK: encoding: [0x5a,0xde,0x5a,0x77] ++xvsrani.d.q $xr26, $xr18, 55 ++ ++# CHECK: xvsrarni.b.h $xr17, $xr21, 11 ++# CHECK: encoding: [0xb1,0x6e,0x5c,0x77] ++xvsrarni.b.h $xr17, $xr21, 11 ++ ++# CHECK: xvsrarni.h.w $xr15, $xr30, 2 ++# CHECK: encoding: [0xcf,0x8b,0x5c,0x77] ++xvsrarni.h.w $xr15, $xr30, 2 ++ ++# CHECK: xvsrarni.w.d $xr23, $xr11, 31 ++# CHECK: encoding: [0x77,0x7d,0x5d,0x77] ++xvsrarni.w.d $xr23, $xr11, 31 ++ ++# CHECK: xvsrarni.d.q $xr22, $xr25, 16 ++# CHECK: encoding: [0x36,0x43,0x5e,0x77] ++xvsrarni.d.q $xr22, $xr25, 16 ++ ++# CHECK: xvssrani.b.h $xr19, $xr20, 10 ++# CHECK: encoding: [0x93,0x6a,0x60,0x77] ++xvssrani.b.h $xr19, $xr20, 10 ++ ++# CHECK: xvssrani.h.w $xr25, $xr9, 22 ++# CHECK: encoding: [0x39,0xd9,0x60,0x77] ++xvssrani.h.w $xr25, $xr9, 22 ++ ++# CHECK: xvssrani.w.d $xr23, $xr2, 7 ++# CHECK: encoding: [0x57,0x1c,0x61,0x77] ++xvssrani.w.d $xr23, $xr2, 7 ++ ++# CHECK: xvssrani.d.q $xr6, $xr8, 127 ++# CHECK: encoding: [0x06,0xfd,0x63,0x77] ++xvssrani.d.q $xr6, $xr8, 127 ++ ++# CHECK: xvssrani.bu.h $xr27, $xr14, 5 ++# CHECK: encoding: [0xdb,0x55,0x64,0x77] ++xvssrani.bu.h $xr27, $xr14, 5 ++ ++# CHECK: xvssrani.hu.w $xr14, $xr1, 20 ++# CHECK: encoding: [0x2e,0xd0,0x64,0x77] ++xvssrani.hu.w $xr14, $xr1, 20 ++ ++# CHECK: xvssrani.wu.d $xr10, $xr4, 59 ++# CHECK: encoding: [0x8a,0xec,0x65,0x77] ++xvssrani.wu.d $xr10, $xr4, 59 ++ ++# CHECK: xvssrani.du.q $xr17, $xr1, 82 ++# CHECK: encoding: [0x31,0x48,0x67,0x77] ++xvssrani.du.q $xr17, $xr1, 82 ++ ++# CHECK: xvssrarni.b.h $xr27, $xr18, 15 ++# CHECK: encoding: [0x5b,0x7e,0x68,0x77] ++xvssrarni.b.h $xr27, $xr18, 15 ++ ++# CHECK: xvssrarni.h.w $xr16, $xr3, 15 ++# CHECK: encoding: [0x70,0xbc,0x68,0x77] ++xvssrarni.h.w $xr16, $xr3, 15 ++ ++# CHECK: xvssrarni.w.d $xr26, $xr25, 18 ++# CHECK: encoding: [0x3a,0x4b,0x69,0x77] ++xvssrarni.w.d $xr26, $xr25, 18 ++ ++# CHECK: xvssrarni.d.q $xr28, $xr25, 0 ++# CHECK: encoding: [0x3c,0x03,0x6a,0x77] ++xvssrarni.d.q $xr28, $xr25, 0 ++ ++# CHECK: xvssrarni.bu.h $xr1, $xr12, 8 ++# CHECK: encoding: [0x81,0x61,0x6c,0x77] ++xvssrarni.bu.h $xr1, $xr12, 8 ++ ++# CHECK: xvssrarni.hu.w $xr3, $xr27, 31 ++# CHECK: encoding: [0x63,0xff,0x6c,0x77] ++xvssrarni.hu.w $xr3, $xr27, 31 ++ ++# CHECK: xvssrarni.wu.d $xr24, $xr27, 52 ++# CHECK: encoding: [0x78,0xd3,0x6d,0x77] ++xvssrarni.wu.d $xr24, $xr27, 52 ++ ++# CHECK: xvssrarni.du.q $xr5, $xr3, 112 ++# CHECK: encoding: [0x65,0xc0,0x6f,0x77] ++xvssrarni.du.q $xr5, $xr3, 112 ++ ++# CHECK: xvextrins.d $xr21, $xr25, 163 ++# CHECK: encoding: [0x35,0x8f,0x82,0x77] ++xvextrins.d $xr21, $xr25, 163 ++ ++# CHECK: xvextrins.w $xr19, $xr17, 28 ++# CHECK: encoding: [0x33,0x72,0x84,0x77] ++xvextrins.w $xr19, $xr17, 28 ++ ++# CHECK: xvextrins.h $xr30, $xr7, 79 ++# CHECK: encoding: [0xfe,0x3c,0x89,0x77] ++xvextrins.h $xr30, $xr7, 79 ++ ++# CHECK: xvextrins.b $xr1, $xr31, 210 ++# CHECK: encoding: [0xe1,0x4b,0x8f,0x77] ++xvextrins.b $xr1, $xr31, 210 ++ ++# CHECK: xvshuf4i.b $xr3, $xr22, 148 ++# CHECK: encoding: [0xc3,0x52,0x92,0x77] ++xvshuf4i.b $xr3, $xr22, 148 ++ ++# CHECK: xvshuf4i.h $xr2, $xr22, 34 ++# CHECK: encoding: [0xc2,0x8a,0x94,0x77] ++xvshuf4i.h $xr2, $xr22, 34 ++ ++# CHECK: xvshuf4i.w $xr31, $xr19, 165 ++# CHECK: encoding: [0x7f,0x96,0x9a,0x77] ++xvshuf4i.w $xr31, $xr19, 165 ++ ++# CHECK: xvshuf4i.d $xr31, $xr17, 14 ++# CHECK: encoding: [0x3f,0x3a,0x9c,0x77] ++xvshuf4i.d $xr31, $xr17, 14 ++ ++# CHECK: xvbitseli.b $xr27, $xr0, 80 ++# CHECK: encoding: [0x1b,0x40,0xc5,0x77] ++xvbitseli.b $xr27, $xr0, 80 ++ ++# CHECK: xvandi.b $xr23, $xr2, 153 ++# CHECK: encoding: [0x57,0x64,0xd2,0x77] ++xvandi.b $xr23, $xr2, 153 ++ ++# CHECK: xvori.b $xr27, $xr28, 188 ++# CHECK: encoding: [0x9b,0xf3,0xd6,0x77] ++xvori.b $xr27, $xr28, 188 ++ ++# CHECK: xvxori.b $xr28, $xr1, 254 ++# CHECK: encoding: [0x3c,0xf8,0xdb,0x77] ++xvxori.b $xr28, $xr1, 254 ++ ++# CHECK: xvnori.b $xr4, $xr2, 36 ++# CHECK: encoding: [0x44,0x90,0xdc,0x77] ++xvnori.b $xr4, $xr2, 36 ++ ++# CHECK: xvldi $xr26, -2544 ++# CHECK: encoding: [0x1a,0xc2,0xe2,0x77] ++xvldi $xr26, -2544 ++ ++# CHECK: xvpermi.w $xr22, $xr24, 168 ++# CHECK: encoding: [0x16,0xa3,0xe6,0x77] ++xvpermi.w $xr22, $xr24, 168 ++ ++# CHECK: xvpermi.d $xr14, $xr31, 136 ++# CHECK: encoding: [0xee,0x23,0xea,0x77] ++xvpermi.d $xr14, $xr31, 136 ++ ++# CHECK: xvpermi.q $xr28, $xr14, 211 ++# CHECK: encoding: [0xdc,0x4d,0xef,0x77] ++xvpermi.q $xr28, $xr14, 211 ++ ++# CHECK: vaddwev.h.b $vr0, $vr31, $vr31 ++# CHECK: encoding: [0xe0,0x7f,0x1e,0x70] ++vaddwev.h.b $vr0, $vr31, $vr31 ++ ++# CHECK: vaddwev.w.h $vr3, $vr4, $vr23 ++# CHECK: encoding: [0x83,0xdc,0x1e,0x70] ++vaddwev.w.h $vr3, $vr4, $vr23 ++ ++# CHECK: vaddwev.d.w $vr30, $vr26, $vr11 ++# CHECK: encoding: [0x5e,0x2f,0x1f,0x70] ++vaddwev.d.w $vr30, $vr26, $vr11 ++ ++# CHECK: vaddwev.q.d $vr25, $vr29, $vr13 ++# CHECK: encoding: [0xb9,0xb7,0x1f,0x70] ++vaddwev.q.d $vr25, $vr29, $vr13 ++ ++# CHECK: vsubwev.h.b $vr11, $vr28, $vr1 ++# CHECK: encoding: [0x8b,0x07,0x20,0x70] ++vsubwev.h.b $vr11, $vr28, $vr1 ++ ++# CHECK: vsubwev.w.h $vr9, $vr15, $vr5 ++# CHECK: encoding: [0xe9,0x95,0x20,0x70] ++vsubwev.w.h $vr9, $vr15, $vr5 ++ ++# CHECK: vsubwev.d.w $vr17, $vr9, $vr10 ++# CHECK: encoding: [0x31,0x29,0x21,0x70] ++vsubwev.d.w $vr17, $vr9, $vr10 ++ ++# CHECK: vsubwev.q.d $vr26, $vr18, $vr11 ++# CHECK: encoding: [0x5a,0xae,0x21,0x70] ++vsubwev.q.d $vr26, $vr18, $vr11 ++ ++# CHECK: vaddwod.h.b $vr7, $vr11, $vr18 ++# CHECK: encoding: [0x67,0x49,0x22,0x70] ++vaddwod.h.b $vr7, $vr11, $vr18 ++ ++# CHECK: vaddwod.w.h $vr0, $vr7, $vr12 ++# CHECK: encoding: [0xe0,0xb0,0x22,0x70] ++vaddwod.w.h $vr0, $vr7, $vr12 ++ ++# CHECK: vaddwod.d.w $vr30, $vr27, $vr16 ++# CHECK: encoding: [0x7e,0x43,0x23,0x70] ++vaddwod.d.w $vr30, $vr27, $vr16 ++ ++# CHECK: vaddwod.q.d $vr2, $vr20, $vr29 ++# CHECK: encoding: [0x82,0xf6,0x23,0x70] ++vaddwod.q.d $vr2, $vr20, $vr29 ++ ++# CHECK: vsubwod.h.b $vr26, $vr7, $vr19 ++# CHECK: encoding: [0xfa,0x4c,0x24,0x70] ++vsubwod.h.b $vr26, $vr7, $vr19 ++ ++# CHECK: vsubwod.w.h $vr19, $vr3, $vr11 ++# CHECK: encoding: [0x73,0xac,0x24,0x70] ++vsubwod.w.h $vr19, $vr3, $vr11 ++ ++# CHECK: vsubwod.d.w $vr31, $vr28, $vr12 ++# CHECK: encoding: [0x9f,0x33,0x25,0x70] ++vsubwod.d.w $vr31, $vr28, $vr12 ++ ++# CHECK: vsubwod.q.d $vr1, $vr24, $vr16 ++# CHECK: encoding: [0x01,0xc3,0x25,0x70] ++vsubwod.q.d $vr1, $vr24, $vr16 ++ ++# CHECK: vaddwev.h.bu $vr3, $vr29, $vr29 ++# CHECK: encoding: [0xa3,0x77,0x2e,0x70] ++vaddwev.h.bu $vr3, $vr29, $vr29 ++ ++# CHECK: vaddwev.w.hu $vr10, $vr15, $vr10 ++# CHECK: encoding: [0xea,0xa9,0x2e,0x70] ++vaddwev.w.hu $vr10, $vr15, $vr10 ++ ++# CHECK: vaddwev.d.wu $vr24, $vr29, $vr4 ++# CHECK: encoding: [0xb8,0x13,0x2f,0x70] ++vaddwev.d.wu $vr24, $vr29, $vr4 ++ ++# CHECK: vaddwev.q.du $vr17, $vr23, $vr0 ++# CHECK: encoding: [0xf1,0x82,0x2f,0x70] ++vaddwev.q.du $vr17, $vr23, $vr0 ++ ++# CHECK: vsubwev.h.bu $vr25, $vr11, $vr20 ++# CHECK: encoding: [0x79,0x51,0x30,0x70] ++vsubwev.h.bu $vr25, $vr11, $vr20 ++ ++# CHECK: vsubwev.w.hu $vr17, $vr15, $vr20 ++# CHECK: encoding: [0xf1,0xd1,0x30,0x70] ++vsubwev.w.hu $vr17, $vr15, $vr20 ++ ++# CHECK: vsubwev.d.wu $vr10, $vr25, $vr5 ++# CHECK: encoding: [0x2a,0x17,0x31,0x70] ++vsubwev.d.wu $vr10, $vr25, $vr5 ++ ++# CHECK: vsubwev.q.du $vr29, $vr3, $vr8 ++# CHECK: encoding: [0x7d,0xa0,0x31,0x70] ++vsubwev.q.du $vr29, $vr3, $vr8 ++ ++# CHECK: vaddwod.h.bu $vr10, $vr0, $vr25 ++# CHECK: encoding: [0x0a,0x64,0x32,0x70] ++vaddwod.h.bu $vr10, $vr0, $vr25 ++ ++# CHECK: vaddwod.w.hu $vr2, $vr27, $vr23 ++# CHECK: encoding: [0x62,0xdf,0x32,0x70] ++vaddwod.w.hu $vr2, $vr27, $vr23 ++ ++# CHECK: vaddwod.d.wu $vr2, $vr0, $vr22 ++# CHECK: encoding: [0x02,0x58,0x33,0x70] ++vaddwod.d.wu $vr2, $vr0, $vr22 ++ ++# CHECK: vaddwod.q.du $vr0, $vr2, $vr3 ++# CHECK: encoding: [0x40,0x8c,0x33,0x70] ++vaddwod.q.du $vr0, $vr2, $vr3 ++ ++# CHECK: vsubwod.h.bu $vr14, $vr31, $vr3 ++# CHECK: encoding: [0xee,0x0f,0x34,0x70] ++vsubwod.h.bu $vr14, $vr31, $vr3 ++ ++# CHECK: vsubwod.w.hu $vr21, $vr2, $vr7 ++# CHECK: encoding: [0x55,0x9c,0x34,0x70] ++vsubwod.w.hu $vr21, $vr2, $vr7 ++ ++# CHECK: vsubwod.d.wu $vr11, $vr8, $vr18 ++# CHECK: encoding: [0x0b,0x49,0x35,0x70] ++vsubwod.d.wu $vr11, $vr8, $vr18 ++ ++# CHECK: vsubwod.q.du $vr30, $vr20, $vr0 ++# CHECK: encoding: [0x9e,0x82,0x35,0x70] ++vsubwod.q.du $vr30, $vr20, $vr0 ++ ++# CHECK: vaddwev.h.bu.b $vr19, $vr28, $vr17 ++# CHECK: encoding: [0x93,0x47,0x3e,0x70] ++vaddwev.h.bu.b $vr19, $vr28, $vr17 ++ ++# CHECK: vaddwev.w.hu.h $vr14, $vr15, $vr30 ++# CHECK: encoding: [0xee,0xf9,0x3e,0x70] ++vaddwev.w.hu.h $vr14, $vr15, $vr30 ++ ++# CHECK: vaddwev.d.wu.w $vr15, $vr7, $vr10 ++# CHECK: encoding: [0xef,0x28,0x3f,0x70] ++vaddwev.d.wu.w $vr15, $vr7, $vr10 ++ ++# CHECK: vaddwev.q.du.d $vr19, $vr14, $vr30 ++# CHECK: encoding: [0xd3,0xf9,0x3f,0x70] ++vaddwev.q.du.d $vr19, $vr14, $vr30 ++ ++# CHECK: vaddwod.h.bu.b $vr15, $vr18, $vr8 ++# CHECK: encoding: [0x4f,0x22,0x40,0x70] ++vaddwod.h.bu.b $vr15, $vr18, $vr8 ++ ++# CHECK: vaddwod.w.hu.h $vr19, $vr27, $vr6 ++# CHECK: encoding: [0x73,0x9b,0x40,0x70] ++vaddwod.w.hu.h $vr19, $vr27, $vr6 ++ ++# CHECK: vaddwod.d.wu.w $vr7, $vr11, $vr15 ++# CHECK: encoding: [0x67,0x3d,0x41,0x70] ++vaddwod.d.wu.w $vr7, $vr11, $vr15 ++ ++# CHECK: vaddwod.q.du.d $vr0, $vr0, $vr26 ++# CHECK: encoding: [0x00,0xe8,0x41,0x70] ++vaddwod.q.du.d $vr0, $vr0, $vr26 ++ ++# CHECK: vmulwev.h.b $vr24, $vr19, $vr21 ++# CHECK: encoding: [0x78,0x56,0x90,0x70] ++vmulwev.h.b $vr24, $vr19, $vr21 ++ ++# CHECK: vmulwev.w.h $vr13, $vr22, $vr18 ++# CHECK: encoding: [0xcd,0xca,0x90,0x70] ++vmulwev.w.h $vr13, $vr22, $vr18 ++ ++# CHECK: vmulwev.d.w $vr24, $vr22, $vr13 ++# CHECK: encoding: [0xd8,0x36,0x91,0x70] ++vmulwev.d.w $vr24, $vr22, $vr13 ++ ++# CHECK: vmulwev.q.d $vr4, $vr22, $vr30 ++# CHECK: encoding: [0xc4,0xfa,0x91,0x70] ++vmulwev.q.d $vr4, $vr22, $vr30 ++ ++# CHECK: vmulwod.h.b $vr22, $vr26, $vr24 ++# CHECK: encoding: [0x56,0x63,0x92,0x70] ++vmulwod.h.b $vr22, $vr26, $vr24 ++ ++# CHECK: vmulwod.w.h $vr17, $vr12, $vr4 ++# CHECK: encoding: [0x91,0x91,0x92,0x70] ++vmulwod.w.h $vr17, $vr12, $vr4 ++ ++# CHECK: vmulwod.d.w $vr16, $vr15, $vr26 ++# CHECK: encoding: [0xf0,0x69,0x93,0x70] ++vmulwod.d.w $vr16, $vr15, $vr26 ++ ++# CHECK: vmulwod.q.d $vr3, $vr16, $vr5 ++# CHECK: encoding: [0x03,0x96,0x93,0x70] ++vmulwod.q.d $vr3, $vr16, $vr5 ++ ++# CHECK: vmulwev.h.bu $vr31, $vr19, $vr19 ++# CHECK: encoding: [0x7f,0x4e,0x98,0x70] ++vmulwev.h.bu $vr31, $vr19, $vr19 ++ ++# CHECK: vmulwev.w.hu $vr22, $vr31, $vr5 ++# CHECK: encoding: [0xf6,0x97,0x98,0x70] ++vmulwev.w.hu $vr22, $vr31, $vr5 ++ ++# CHECK: vmulwev.d.wu $vr0, $vr4, $vr30 ++# CHECK: encoding: [0x80,0x78,0x99,0x70] ++vmulwev.d.wu $vr0, $vr4, $vr30 ++ ++# CHECK: vmulwev.q.du $vr31, $vr3, $vr20 ++# CHECK: encoding: [0x7f,0xd0,0x99,0x70] ++vmulwev.q.du $vr31, $vr3, $vr20 ++ ++# CHECK: vmulwod.h.bu $vr25, $vr7, $vr13 ++# CHECK: encoding: [0xf9,0x34,0x9a,0x70] ++vmulwod.h.bu $vr25, $vr7, $vr13 ++ ++# CHECK: vmulwod.w.hu $vr1, $vr12, $vr12 ++# CHECK: encoding: [0x81,0xb1,0x9a,0x70] ++vmulwod.w.hu $vr1, $vr12, $vr12 ++ ++# CHECK: vmulwod.d.wu $vr15, $vr15, $vr30 ++# CHECK: encoding: [0xef,0x79,0x9b,0x70] ++vmulwod.d.wu $vr15, $vr15, $vr30 ++ ++# CHECK: vmulwod.q.du $vr13, $vr28, $vr6 ++# CHECK: encoding: [0x8d,0x9b,0x9b,0x70] ++vmulwod.q.du $vr13, $vr28, $vr6 ++ ++# CHECK: vmulwev.h.bu.b $vr8, $vr26, $vr3 ++# CHECK: encoding: [0x48,0x0f,0xa0,0x70] ++vmulwev.h.bu.b $vr8, $vr26, $vr3 ++ ++# CHECK: vmulwev.w.hu.h $vr10, $vr25, $vr1 ++# CHECK: encoding: [0x2a,0x87,0xa0,0x70] ++vmulwev.w.hu.h $vr10, $vr25, $vr1 ++ ++# CHECK: vmulwev.d.wu.w $vr9, $vr0, $vr19 ++# CHECK: encoding: [0x09,0x4c,0xa1,0x70] ++vmulwev.d.wu.w $vr9, $vr0, $vr19 ++ ++# CHECK: vmulwev.q.du.d $vr13, $vr24, $vr23 ++# CHECK: encoding: [0x0d,0xdf,0xa1,0x70] ++vmulwev.q.du.d $vr13, $vr24, $vr23 ++ ++# CHECK: vmulwod.h.bu.b $vr20, $vr0, $vr14 ++# CHECK: encoding: [0x14,0x38,0xa2,0x70] ++vmulwod.h.bu.b $vr20, $vr0, $vr14 ++ ++# CHECK: vmulwod.w.hu.h $vr16, $vr20, $vr3 ++# CHECK: encoding: [0x90,0x8e,0xa2,0x70] ++vmulwod.w.hu.h $vr16, $vr20, $vr3 ++ ++# CHECK: vmulwod.d.wu.w $vr5, $vr23, $vr27 ++# CHECK: encoding: [0xe5,0x6e,0xa3,0x70] ++vmulwod.d.wu.w $vr5, $vr23, $vr27 ++ ++# CHECK: vmulwod.q.du.d $vr30, $vr30, $vr29 ++# CHECK: encoding: [0xde,0xf7,0xa3,0x70] ++vmulwod.q.du.d $vr30, $vr30, $vr29 ++ ++# CHECK: vmaddwev.h.b $vr18, $vr0, $vr8 ++# CHECK: encoding: [0x12,0x20,0xac,0x70] ++vmaddwev.h.b $vr18, $vr0, $vr8 ++ ++# CHECK: vmaddwev.w.h $vr29, $vr22, $vr7 ++# CHECK: encoding: [0xdd,0x9e,0xac,0x70] ++vmaddwev.w.h $vr29, $vr22, $vr7 ++ ++# CHECK: vmaddwev.d.w $vr28, $vr13, $vr31 ++# CHECK: encoding: [0xbc,0x7d,0xad,0x70] ++vmaddwev.d.w $vr28, $vr13, $vr31 ++ ++# CHECK: vmaddwev.q.d $vr5, $vr3, $vr13 ++# CHECK: encoding: [0x65,0xb4,0xad,0x70] ++vmaddwev.q.d $vr5, $vr3, $vr13 ++ ++# CHECK: vmaddwod.h.b $vr4, $vr1, $vr9 ++# CHECK: encoding: [0x24,0x24,0xae,0x70] ++vmaddwod.h.b $vr4, $vr1, $vr9 ++ ++# CHECK: vmaddwod.w.h $vr26, $vr9, $vr24 ++# CHECK: encoding: [0x3a,0xe1,0xae,0x70] ++vmaddwod.w.h $vr26, $vr9, $vr24 ++ ++# CHECK: vmaddwod.d.w $vr30, $vr3, $vr13 ++# CHECK: encoding: [0x7e,0x34,0xaf,0x70] ++vmaddwod.d.w $vr30, $vr3, $vr13 ++ ++# CHECK: vmaddwod.q.d $vr15, $vr13, $vr29 ++# CHECK: encoding: [0xaf,0xf5,0xaf,0x70] ++vmaddwod.q.d $vr15, $vr13, $vr29 ++ ++# CHECK: vmaddwev.h.bu $vr24, $vr20, $vr5 ++# CHECK: encoding: [0x98,0x16,0xb4,0x70] ++vmaddwev.h.bu $vr24, $vr20, $vr5 ++ ++# CHECK: vmaddwev.w.hu $vr3, $vr4, $vr8 ++# CHECK: encoding: [0x83,0xa0,0xb4,0x70] ++vmaddwev.w.hu $vr3, $vr4, $vr8 ++ ++# CHECK: vmaddwev.d.wu $vr27, $vr19, $vr4 ++# CHECK: encoding: [0x7b,0x12,0xb5,0x70] ++vmaddwev.d.wu $vr27, $vr19, $vr4 ++ ++# CHECK: vmaddwev.q.du $vr28, $vr27, $vr29 ++# CHECK: encoding: [0x7c,0xf7,0xb5,0x70] ++vmaddwev.q.du $vr28, $vr27, $vr29 ++ ++# CHECK: vmaddwod.h.bu $vr5, $vr20, $vr26 ++# CHECK: encoding: [0x85,0x6a,0xb6,0x70] ++vmaddwod.h.bu $vr5, $vr20, $vr26 ++ ++# CHECK: vmaddwod.w.hu $vr21, $vr30, $vr10 ++# CHECK: encoding: [0xd5,0xab,0xb6,0x70] ++vmaddwod.w.hu $vr21, $vr30, $vr10 ++ ++# CHECK: vmaddwod.d.wu $vr7, $vr11, $vr20 ++# CHECK: encoding: [0x67,0x51,0xb7,0x70] ++vmaddwod.d.wu $vr7, $vr11, $vr20 ++ ++# CHECK: vmaddwod.q.du $vr30, $vr18, $vr24 ++# CHECK: encoding: [0x5e,0xe2,0xb7,0x70] ++vmaddwod.q.du $vr30, $vr18, $vr24 ++ ++# CHECK: vmaddwev.h.bu.b $vr4, $vr1, $vr4 ++# CHECK: encoding: [0x24,0x10,0xbc,0x70] ++vmaddwev.h.bu.b $vr4, $vr1, $vr4 ++ ++# CHECK: vmaddwev.w.hu.h $vr25, $vr11, $vr15 ++# CHECK: encoding: [0x79,0xbd,0xbc,0x70] ++vmaddwev.w.hu.h $vr25, $vr11, $vr15 ++ ++# CHECK: vmaddwev.d.wu.w $vr10, $vr16, $vr20 ++# CHECK: encoding: [0x0a,0x52,0xbd,0x70] ++vmaddwev.d.wu.w $vr10, $vr16, $vr20 ++ ++# CHECK: vmaddwev.q.du.d $vr22, $vr20, $vr23 ++# CHECK: encoding: [0x96,0xde,0xbd,0x70] ++vmaddwev.q.du.d $vr22, $vr20, $vr23 ++ ++# CHECK: vmaddwod.h.bu.b $vr31, $vr25, $vr27 ++# CHECK: encoding: [0x3f,0x6f,0xbe,0x70] ++vmaddwod.h.bu.b $vr31, $vr25, $vr27 ++ ++# CHECK: vmaddwod.w.hu.h $vr8, $vr18, $vr24 ++# CHECK: encoding: [0x48,0xe2,0xbe,0x70] ++vmaddwod.w.hu.h $vr8, $vr18, $vr24 ++ ++# CHECK: vmaddwod.d.wu.w $vr18, $vr13, $vr10 ++# CHECK: encoding: [0xb2,0x29,0xbf,0x70] ++vmaddwod.d.wu.w $vr18, $vr13, $vr10 ++ ++# CHECK: vmaddwod.q.du.d $vr10, $vr5, $vr15 ++# CHECK: encoding: [0xaa,0xbc,0xbf,0x70] ++vmaddwod.q.du.d $vr10, $vr5, $vr15 ++ +diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg b/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg +deleted file mode 100644 +index 31902e060..000000000 +--- a/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg ++++ /dev/null +@@ -1,5 +0,0 @@ +-config.suffixes = ['.ll'] +- +-targets = set(config.root.targets_to_build.split()) +-if not 'LoongArch' in targets: +- config.unsupported = True +diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll +deleted file mode 100644 +index 4acf97614..000000000 +--- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll ++++ /dev/null +@@ -1,121 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +-; RUN: opt -S --mtriple=loongarch32 --atomic-expand %s | FileCheck %s --check-prefix=LA32 +-; RUN: opt -S --mtriple=loongarch64 --atomic-expand %s | FileCheck %s --check-prefix=LA64 +- +-define i8 @load_acquire_i8(ptr %ptr) { +-; LA32-LABEL: @load_acquire_i8( +-; LA32-NEXT: [[VAL:%.*]] = load atomic i8, ptr [[PTR:%.*]] monotonic, align 1 +-; LA32-NEXT: fence acquire +-; LA32-NEXT: ret i8 [[VAL]] +-; +-; LA64-LABEL: @load_acquire_i8( +-; LA64-NEXT: [[VAL:%.*]] = load atomic i8, ptr [[PTR:%.*]] monotonic, align 1 +-; LA64-NEXT: fence acquire +-; LA64-NEXT: ret i8 [[VAL]] +-; +- %val = load atomic i8, ptr %ptr acquire, align 1 +- ret i8 %val +-} +- +-define i16 @load_acquire_i16(ptr %ptr) { +-; LA32-LABEL: @load_acquire_i16( +-; LA32-NEXT: [[VAL:%.*]] = load atomic i16, ptr [[PTR:%.*]] monotonic, align 2 +-; LA32-NEXT: fence acquire +-; LA32-NEXT: ret i16 [[VAL]] +-; +-; LA64-LABEL: @load_acquire_i16( +-; LA64-NEXT: [[VAL:%.*]] = load atomic i16, ptr [[PTR:%.*]] monotonic, align 2 +-; LA64-NEXT: fence acquire +-; LA64-NEXT: ret i16 [[VAL]] +-; +- %val = load atomic i16, ptr %ptr acquire, align 2 +- ret i16 %val +-} +- +-define i32 @load_acquire_i32(ptr %ptr) { +-; LA32-LABEL: @load_acquire_i32( +-; LA32-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR:%.*]] monotonic, align 4 +-; LA32-NEXT: fence acquire +-; LA32-NEXT: ret i32 [[VAL]] +-; +-; LA64-LABEL: @load_acquire_i32( +-; LA64-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR:%.*]] monotonic, align 4 +-; LA64-NEXT: fence acquire +-; LA64-NEXT: ret i32 [[VAL]] +-; +- %val = load atomic i32, ptr %ptr acquire, align 4 +- ret i32 %val +-} +- +-define i64 @load_acquire_i64(ptr %ptr) { +-; LA32-LABEL: @load_acquire_i64( +-; LA32-NEXT: [[TMP1:%.*]] = call i64 @__atomic_load_8(ptr [[PTR:%.*]], i32 2) +-; LA32-NEXT: ret i64 [[TMP1]] +-; +-; LA64-LABEL: @load_acquire_i64( +-; LA64-NEXT: [[VAL:%.*]] = load atomic i64, ptr [[PTR:%.*]] monotonic, align 8 +-; LA64-NEXT: fence acquire +-; LA64-NEXT: ret i64 [[VAL]] +-; +- %val = load atomic i64, ptr %ptr acquire, align 8 +- ret i64 %val +-} +- +-define void @store_release_i8(ptr %ptr, i8 signext %v) { +-; LA32-LABEL: @store_release_i8( +-; LA32-NEXT: fence release +-; LA32-NEXT: store atomic i8 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 1 +-; LA32-NEXT: ret void +-; +-; LA64-LABEL: @store_release_i8( +-; LA64-NEXT: fence release +-; LA64-NEXT: store atomic i8 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 1 +-; LA64-NEXT: ret void +-; +- store atomic i8 %v, ptr %ptr release, align 1 +- ret void +-} +- +-define void @store_release_i16(ptr %ptr, i16 signext %v) { +-; LA32-LABEL: @store_release_i16( +-; LA32-NEXT: fence release +-; LA32-NEXT: store atomic i16 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 2 +-; LA32-NEXT: ret void +-; +-; LA64-LABEL: @store_release_i16( +-; LA64-NEXT: fence release +-; LA64-NEXT: store atomic i16 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 2 +-; LA64-NEXT: ret void +-; +- store atomic i16 %v, ptr %ptr release, align 2 +- ret void +-} +- +-define void @store_release_i32(ptr %ptr, i32 signext %v) { +-; LA32-LABEL: @store_release_i32( +-; LA32-NEXT: fence release +-; LA32-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 +-; LA32-NEXT: ret void +-; +-; LA64-LABEL: @store_release_i32( +-; LA64-NEXT: fence release +-; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 +-; LA64-NEXT: ret void +-; +- store atomic i32 %v, ptr %ptr release, align 4 +- ret void +-} +- +-define void @store_release_i64(ptr %ptr, i64 %v) { +-; LA32-LABEL: @store_release_i64( +-; LA32-NEXT: call void @__atomic_store_8(ptr [[PTR:%.*]], i64 [[V:%.*]], i32 3) +-; LA32-NEXT: ret void +-; +-; LA64-LABEL: @store_release_i64( +-; LA64-NEXT: fence release +-; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8 +-; LA64-NEXT: ret void +-; +- store atomic i64 %v, ptr %ptr release, align 8 +- ret void +-} +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll +index 058245269..d9b2fc09d 100644 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll ++++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll +@@ -1,6 +1,6 @@ + ; Check that we accept functions with '$' in the name. + +-; RUN: llc -mtriple=loongarch32-unknown-linux < %s | FileCheck %s ++; RUN: llc -mtriple=loongarch64-unknown-linux < %s | FileCheck %s + + define hidden i32 @"_Z54bar$ompvariant$bar"() { + entry: +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected +index 692941b50..306b22ebb 100644 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected ++++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected +@@ -1,13 +1,13 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; Check that we accept functions with '$' in the name. + +-; RUN: llc -mtriple=loongarch32-unknown-linux < %s | FileCheck %s ++; RUN: llc -mtriple=loongarch64-unknown-linux < %s | FileCheck %s + + define hidden i32 @"_Z54bar$ompvariant$bar"() { + ; CHECK-LABEL: _Z54bar$ompvariant$bar: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: jirl $zero, $ra, 0 ++; CHECK-NEXT: ori $r4, $zero, 2 ++; CHECK-NEXT: jr $ra + entry: + ret i32 2 + } +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll +deleted file mode 100644 +index 5de94d73d..000000000 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll ++++ /dev/null +@@ -1,63 +0,0 @@ +-; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s +-@x = dso_local global i32 0, align 4 +- +-define dso_local i32 @check_boundaries() #0 { +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- store i32 0, ptr %1, align 4 +- store i32 0, ptr %2, align 4 +- %6 = load i32, ptr %2, align 4 +- %7 = icmp ne i32 %6, 0 +- br i1 %7, label %9, label %8 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %10 +- +- store i32 1, ptr %4, align 4 +- br label %10 +- +- %11 = load i32, ptr %2, align 4 +- %12 = icmp ne i32 %11, 0 +- br i1 %12, label %14, label %13 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %15 +- +- store i32 1, ptr %4, align 4 +- br label %15 +- +- ret i32 0 +-} +- +-define dso_local i32 @main() #0 { +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- +- store i32 0, ptr %1, align 4 +- store i32 0, ptr @x, align 4 +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- store i32 1, ptr @x, align 4 +- call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- ret i32 0 +-} +- +-attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected +deleted file mode 100644 +index 5d091d735..000000000 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected ++++ /dev/null +@@ -1,148 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs +-; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s +-@x = dso_local global i32 0, align 4 +- +-define dso_local i32 @check_boundaries() #0 { +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- store i32 0, ptr %1, align 4 +- store i32 0, ptr %2, align 4 +- %6 = load i32, ptr %2, align 4 +- %7 = icmp ne i32 %6, 0 +- br i1 %7, label %9, label %8 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %10 +- +- store i32 1, ptr %4, align 4 +- br label %10 +- +- %11 = load i32, ptr %2, align 4 +- %12 = icmp ne i32 %11, 0 +- br i1 %12, label %14, label %13 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %15 +- +- store i32 1, ptr %4, align 4 +- br label %15 +- +- ret i32 0 +-} +- +-define dso_local i32 @main() #0 { +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- +- store i32 0, ptr %1, align 4 +- store i32 0, ptr @x, align 4 +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- store i32 1, ptr @x, align 4 +- call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- ret i32 0 +-} +- +-attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } +-; CHECK-LABEL: check_boundaries: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $sp, $sp, -32 +-; CHECK-NEXT: .cfi_def_cfa_offset 32 +-; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +-; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +-; CHECK-NEXT: .cfi_offset 1, -4 +-; CHECK-NEXT: .cfi_offset 22, -8 +-; CHECK-NEXT: addi.w $fp, $sp, 32 +-; CHECK-NEXT: .cfi_def_cfa 22, 0 +-; CHECK-NEXT: st.w $zero, $fp, -16 +-; CHECK-NEXT: st.w $zero, $fp, -12 +-; CHECK-NEXT: bnez $zero, .LBB0_2 +-; CHECK-NEXT: b .LBB0_1 +-; CHECK-NEXT: .LBB0_2: +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: .LBB0_3: +-; CHECK-NEXT: ld.w $a0, $fp, -16 +-; CHECK-NEXT: bne $a0, $zero, .LBB0_5 +-; CHECK-NEXT: b .LBB0_4 +-; CHECK-NEXT: .LBB0_5: +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: .LBB0_6: +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +-; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +-; CHECK-NEXT: addi.w $sp, $sp, 32 +-; CHECK-NEXT: jirl $zero, $ra, 0 +-; CHECK-NEXT: .LBB0_1: +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: ori $a0, $zero, 4 +-; CHECK-NEXT: st.w $a0, $fp, -28 +-; CHECK-NEXT: b .LBB0_3 +-; CHECK-NEXT: .LBB0_4: +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: ori $a0, $zero, 4 +-; CHECK-NEXT: st.w $a0, $fp, -28 +-; CHECK-NEXT: b .LBB0_6 +-; +-; CHECK-LABEL: main: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $sp, $sp, -32 +-; CHECK-NEXT: .cfi_def_cfa_offset 32 +-; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +-; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +-; CHECK-NEXT: .cfi_offset 1, -4 +-; CHECK-NEXT: .cfi_offset 22, -8 +-; CHECK-NEXT: addi.w $fp, $sp, 32 +-; CHECK-NEXT: .cfi_def_cfa 22, 0 +-; CHECK-NEXT: pcalau12i $a0, x +-; CHECK-NEXT: addi.w $a0, $a0, x +-; CHECK-NEXT: ori $a1, $zero, 1 +-; CHECK-NEXT: st.w $a1, $a0, 0 +-; CHECK-NEXT: st.w $zero, $fp, -12 +-; CHECK-NEXT: st.w $a1, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a2, $zero, 3 +-; CHECK-NEXT: st.w $a2, $fp, -24 +-; CHECK-NEXT: ori $a3, $zero, 4 +-; CHECK-NEXT: st.w $a3, $fp, -28 +-; CHECK-NEXT: #APP +-; CHECK-NEXT: #NO_APP +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: st.w $a1, $fp, -16 +-; CHECK-NEXT: st.w $a2, $fp, -24 +-; CHECK-NEXT: st.w $a3, $fp, -28 +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +-; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +-; CHECK-NEXT: addi.w $sp, $sp, 32 +-; CHECK-NEXT: jirl $zero, $ra, 0 +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +deleted file mode 100644 +index d4edfe5e0..000000000 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected ++++ /dev/null +@@ -1,147 +0,0 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s +-@x = dso_local global i32 0, align 4 +- +-define dso_local i32 @check_boundaries() #0 { +-; CHECK-LABEL: check_boundaries: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $sp, $sp, -32 +-; CHECK-NEXT: .cfi_def_cfa_offset 32 +-; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +-; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +-; CHECK-NEXT: .cfi_offset 1, -4 +-; CHECK-NEXT: .cfi_offset 22, -8 +-; CHECK-NEXT: addi.w $fp, $sp, 32 +-; CHECK-NEXT: .cfi_def_cfa 22, 0 +-; CHECK-NEXT: st.w $zero, $fp, -16 +-; CHECK-NEXT: st.w $zero, $fp, -12 +-; CHECK-NEXT: bnez $zero, .LBB0_2 +-; CHECK-NEXT: b .LBB0_1 +-; CHECK-NEXT: .LBB0_2: +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: .LBB0_3: +-; CHECK-NEXT: ld.w $a0, $fp, -16 +-; CHECK-NEXT: bne $a0, $zero, .LBB0_5 +-; CHECK-NEXT: b .LBB0_4 +-; CHECK-NEXT: .LBB0_5: +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: .LBB0_6: +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +-; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +-; CHECK-NEXT: addi.w $sp, $sp, 32 +-; CHECK-NEXT: jirl $zero, $ra, 0 +-; CHECK-NEXT: .LBB0_1: +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: ori $a0, $zero, 4 +-; CHECK-NEXT: st.w $a0, $fp, -28 +-; CHECK-NEXT: b .LBB0_3 +-; CHECK-NEXT: .LBB0_4: +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.w $a0, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: st.w $a0, $fp, -24 +-; CHECK-NEXT: ori $a0, $zero, 4 +-; CHECK-NEXT: st.w $a0, $fp, -28 +-; CHECK-NEXT: b .LBB0_6 +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- store i32 0, ptr %1, align 4 +- store i32 0, ptr %2, align 4 +- %6 = load i32, ptr %2, align 4 +- %7 = icmp ne i32 %6, 0 +- br i1 %7, label %9, label %8 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %10 +- +- store i32 1, ptr %4, align 4 +- br label %10 +- +- %11 = load i32, ptr %2, align 4 +- %12 = icmp ne i32 %11, 0 +- br i1 %12, label %14, label %13 +- +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- br label %15 +- +- store i32 1, ptr %4, align 4 +- br label %15 +- +- ret i32 0 +-} +- +-define dso_local i32 @main() #0 { +-; CHECK-LABEL: main: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $sp, $sp, -32 +-; CHECK-NEXT: .cfi_def_cfa_offset 32 +-; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +-; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +-; CHECK-NEXT: .cfi_offset 1, -4 +-; CHECK-NEXT: .cfi_offset 22, -8 +-; CHECK-NEXT: addi.w $fp, $sp, 32 +-; CHECK-NEXT: .cfi_def_cfa 22, 0 +-; CHECK-NEXT: pcalau12i $a0, x +-; CHECK-NEXT: addi.w $a0, $a0, x +-; CHECK-NEXT: ori $a1, $zero, 1 +-; CHECK-NEXT: st.w $a1, $a0, 0 +-; CHECK-NEXT: st.w $zero, $fp, -12 +-; CHECK-NEXT: st.w $a1, $fp, -16 +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: ori $a2, $zero, 3 +-; CHECK-NEXT: st.w $a2, $fp, -24 +-; CHECK-NEXT: ori $a3, $zero, 4 +-; CHECK-NEXT: st.w $a3, $fp, -28 +-; CHECK-NEXT: #APP +-; CHECK-NEXT: #NO_APP +-; CHECK-NEXT: st.w $a0, $fp, -20 +-; CHECK-NEXT: st.w $a1, $fp, -16 +-; CHECK-NEXT: st.w $a2, $fp, -24 +-; CHECK-NEXT: st.w $a3, $fp, -28 +-; CHECK-NEXT: move $a0, $zero +-; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +-; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +-; CHECK-NEXT: addi.w $sp, $sp, 32 +-; CHECK-NEXT: jirl $zero, $ra, 0 +- %1 = alloca i32, align 4 +- %2 = alloca i32, align 4 +- %3 = alloca i32, align 4 +- %4 = alloca i32, align 4 +- %5 = alloca i32, align 4 +- +- store i32 0, ptr %1, align 4 +- store i32 0, ptr @x, align 4 +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- store i32 1, ptr @x, align 4 +- call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() +- store i32 1, ptr %2, align 4 +- store i32 2, ptr %3, align 4 +- store i32 3, ptr %4, align 4 +- store i32 4, ptr %5, align 4 +- ret i32 0 +-} +- +-attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } +diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test +deleted file mode 100644 +index 2209d3036..000000000 +--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test ++++ /dev/null +@@ -1,17 +0,0 @@ +-# REQUIRES: loongarch-registered-target +- +-## Check that generated functions are included. +-# RUN: cp -f %S/Inputs/loongarch_generated_funcs.ll %t.ll && %update_llc_test_checks --include-generated-funcs %t.ll +-# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.generated.expected +- +-## Check that running the script again does not change the result: +-# RUN: %update_llc_test_checks --include-generated-funcs %t.ll +-# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.generated.expected +- +-## Check that generated functions are not included. +-# RUN: cp -f %S/Inputs/loongarch_generated_funcs.ll %t.ll && %update_llc_test_checks %t.ll +-# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +- +-## Check that running the script again does not change the result: +-# RUN: %update_llc_test_checks %t.ll +-# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +diff --git a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test b/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test +deleted file mode 100644 +index b66273640..000000000 +--- a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test ++++ /dev/null +@@ -1,64 +0,0 @@ +-## Check llvm-readobj is able to decode all possible LoongArch e_flags field values. +- +-# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S +-# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefix=READOBJ-LP64S %s +-# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefix=READELF-LP64S --match-full-lines %s +- +-# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F +-# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefix=READOBJ-LP64F %s +-# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefix=READELF-LP64F --match-full-lines %s +- +-# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D +-# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefix=READOBJ-LP64D %s +-# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefix=READELF-LP64D --match-full-lines %s +- +-# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S +-# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefix=READOBJ-ILP32S %s +-# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefix=READELF-ILP32S --match-full-lines %s +- +-# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F +-# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefix=READOBJ-ILP32F %s +-# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefix=READELF-ILP32F --match-full-lines %s +- +-# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D +-# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefix=READOBJ-ILP32D %s +-# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefix=READELF-ILP32D --match-full-lines %s +- +-# READOBJ-LP64S: Flags [ (0x1) +-# READOBJ-LP64S-NEXT: EF_LOONGARCH_BASE_ABI_LP64S (0x1) +-# READOBJ-LP64S-NEXT: ] +- +-# READOBJ-LP64F: Flags [ (0x2) +-# READOBJ-LP64F-NEXT: EF_LOONGARCH_BASE_ABI_LP64F (0x2) +-# READOBJ-LP64F-NEXT: ] +- +-# READOBJ-LP64D: Flags [ (0x3) +-# READOBJ-LP64D-NEXT: EF_LOONGARCH_BASE_ABI_LP64D (0x3) +-# READOBJ-LP64D-NEXT: ] +- +-# READOBJ-ILP32S: Flags [ (0x5) +-# READOBJ-ILP32S-NEXT: EF_LOONGARCH_BASE_ABI_ILP32S (0x5) +-# READOBJ-ILP32S-NEXT: ] +- +-# READOBJ-ILP32F: Flags [ (0x6) +-# READOBJ-ILP32F-NEXT: EF_LOONGARCH_BASE_ABI_ILP32F (0x6) +-# READOBJ-ILP32F-NEXT: ] +- +-# READOBJ-ILP32D: Flags [ (0x7) +-# READOBJ-ILP32D-NEXT: EF_LOONGARCH_BASE_ABI_ILP32D (0x7) +-# READOBJ-ILP32D-NEXT: ] +- +-# READELF-LP64S: Flags: 0x1, LP64, SOFT-FLOAT +-# READELF-LP64F: Flags: 0x2, LP64, SINGLE-FLOAT +-# READELF-LP64D: Flags: 0x3, LP64, DOUBLE-FLOAT +-# READELF-ILP32S: Flags: 0x5, ILP32, SOFT-FLOAT +-# READELF-ILP32F: Flags: 0x6, ILP32, SINGLE-FLOAT +-# READELF-ILP32D: Flags: 0x7, ILP32, DOUBLE-FLOAT +- +---- !ELF +-FileHeader: +- Class: ELFCLASS[[CLASS]] +- Data: ELFDATA2LSB +- Type: ET_EXEC +- Machine: EM_LOONGARCH +- Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] +diff --git a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml b/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml +deleted file mode 100644 +index e4c4c292e..000000000 +--- a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml ++++ /dev/null +@@ -1,29 +0,0 @@ +-## Check obj2yaml is able to decode all possible LoongArch e_flags field values. +- +-# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S +-# RUN: obj2yaml %t-lp64s | FileCheck -DFLAG=LP64S %s +- +-# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F +-# RUN: obj2yaml %t-lp64f | FileCheck -DFLAG=LP64F %s +- +-# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D +-# RUN: obj2yaml %t-lp64d | FileCheck -DFLAG=LP64D %s +- +-# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S +-# RUN: obj2yaml %t-ilp32s | FileCheck -DFLAG=ILP32S %s +- +-# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F +-# RUN: obj2yaml %t-ilp32f | FileCheck -DFLAG=ILP32F %s +- +-# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D +-# RUN: obj2yaml %t-ilp32d | FileCheck -DFLAG=ILP32D %s +- +-# CHECK: Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] +- +---- !ELF +-FileHeader: +- Class: ELFCLASS[[CLASS]] +- Data: ELFDATA2LSB +- Type: ET_EXEC +- Machine: EM_LOONGARCH +- Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] +diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp +index ba7bae96a..ae2dec5d1 100644 +--- a/llvm/tools/llvm-readobj/ELFDumper.cpp ++++ b/llvm/tools/llvm-readobj/ELFDumper.cpp +@@ -1648,15 +1648,6 @@ const EnumEntry ElfHeaderAVRFlags[] = { + ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"), + }; + +-const EnumEntry ElfHeaderLoongArchFlags[] = { +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32S, "ILP32, SOFT-FLOAT"), +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32F, "ILP32, SINGLE-FLOAT"), +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32D, "ILP32, DOUBLE-FLOAT"), +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64S, "LP64, SOFT-FLOAT"), +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64F, "LP64, SINGLE-FLOAT"), +- ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64D, "LP64, DOUBLE-FLOAT"), +-}; +- + + const EnumEntry ElfSymOtherFlags[] = { + LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL), +@@ -3366,9 +3357,6 @@ template void GNUELFDumper::printFileHeaders() { + else if (e.e_machine == EM_AVR) + ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); +- else if (e.e_machine == EM_LOONGARCH) +- ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), +- unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + Str = "0x" + utohexstr(e.e_flags); + if (!ElfFlags.empty()) + Str = Str + ", " + ElfFlags; +@@ -6519,9 +6507,6 @@ template void LLVMELFDumper::printFileHeaders() { + else if (E.e_machine == EM_AVR) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); +- else if (E.e_machine == EM_LOONGARCH) +- W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), +- unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + else + W.printFlags("Flags", E.e_flags); + W.printNumber("HeaderSize", E.e_ehsize); diff --git a/0002-Add-LoongArch-Support-for-ObjectYAML.patch b/0002-Add-LoongArch-Support-for-ObjectYAML.patch new file mode 100644 index 0000000..4cf0bb1 --- /dev/null +++ b/0002-Add-LoongArch-Support-for-ObjectYAML.patch @@ -0,0 +1,1983 @@ +diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +index 195c0e6a8..c8c219d44 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +@@ -9996,8 +9996,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); +- if (!isConstantIntBuildVectorOrConstantInt(N0) && +- isConstantIntBuildVectorOrConstantInt(N1)) { ++ if (!isa(N0) && isa(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + + // Rewrite an ADD constant node into a DIExpression. Since we are +diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp +index b778006cf..20600a334 100644 +--- a/llvm/lib/ObjectYAML/ELFYAML.cpp ++++ b/llvm/lib/ObjectYAML/ELFYAML.cpp +@@ -612,6 +612,11 @@ void ScalarBitSetTraits::bitset(IO &IO, + break; + } + break; ++ case ELF::EM_LOONGARCH: ++ BCaseMask(EF_LARCH_ABI_LP32, EF_LARCH_ABI); ++ BCaseMask(EF_LARCH_ABI_LPX32, EF_LARCH_ABI); ++ BCaseMask(EF_LARCH_ABI_LP64, EF_LARCH_ABI); ++ break; + default: + break; + } +diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll +new file mode 100644 +index 000000000..8ff055f13 +--- /dev/null ++++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll +@@ -0,0 +1,39 @@ ++; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s ++ ++;; Check this won't result in crash. ++define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) { ++ %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer) ++ %6 = add nsw <8 x i32> %1, ++ call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15 ++ %7 = bitcast i8 %2 to <8 x i1> ++ %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5 ++ %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer) ++ %10 = bitcast i8 %3 to <8 x i1> ++ %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> ++ ret <8 x i32> %11 ++} ++ ++declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) ++declare void @llvm.dbg.value(metadata, metadata, metadata) ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) ++!1 = !DIFile(filename: "a.cpp", directory: "/") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11) ++!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10) ++!6 = !DINamespace(name: "ns1", scope: null) ++!7 = !DISubroutineType(types: !8) ++!8 = !{null} ++!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) ++!10 = !{!4} ++!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13) ++!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) ++!13 = !{!14} ++!14 = !DISubrange(count: 4) ++!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16) ++!16 = !DILocation(line: 18, scope: !17) ++!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +diff --git a/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg b/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000..6223fc691 +--- /dev/null ++++ b/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg +@@ -0,0 +1,3 @@ ++if not 'LoongArch' in config.root.targets: ++ config.unsupported = True ++ +diff --git a/llvm/test/MC/Disassembler/LoongArch/simd.txt b/llvm/test/MC/Disassembler/LoongArch/simd.txt +new file mode 100644 +index 000000000..90da1700a +--- /dev/null ++++ b/llvm/test/MC/Disassembler/LoongArch/simd.txt +@@ -0,0 +1,1361 @@ ++# RUN: llvm-mc --disassemble %s -triple=loongarch64-unknown-linux -mattr=+lsx,+lasx | FileCheck %s ++ ++0xcf 0x2a 0x19 0x09 # CHECK: vfmadd.s $vr15, $vr22, $vr10, $vr18 ++0x01 0x30 0x25 0x09 # CHECK: vfmadd.d $vr1, $vr0, $vr12, $vr10 ++0x50 0x36 0x54 0x09 # CHECK: vfmsub.s $vr16, $vr18, $vr13, $vr8 ++0xb9 0x05 0x6a 0x09 # CHECK: vfmsub.d $vr25, $vr13, $vr1, $vr20 ++0x56 0x44 0x9b 0x09 # CHECK: vfnmadd.s $vr22, $vr2, $vr17, $vr22 ++0xbc 0x0b 0xa7 0x09 # CHECK: vfnmadd.d $vr28, $vr29, $vr2, $vr14 ++0x93 0x44 0xdc 0x09 # CHECK: vfnmsub.s $vr19, $vr4, $vr17, $vr24 ++0xd8 0x72 0xef 0x09 # CHECK: vfnmsub.d $vr24, $vr22, $vr28, $vr30 ++0x8f 0xa7 0x17 0x0a # CHECK: xvfmadd.s $xr15, $xr28, $xr9, $xr15 ++0x05 0x33 0x25 0x0a # CHECK: xvfmadd.d $xr5, $xr24, $xr12, $xr10 ++0x14 0x6c 0x5d 0x0a # CHECK: xvfmsub.s $xr20, $xr0, $xr27, $xr26 ++0x0d 0x65 0x6d 0x0a # CHECK: xvfmsub.d $xr13, $xr8, $xr25, $xr26 ++0xce 0x59 0x94 0x0a # CHECK: xvfnmadd.s $xr14, $xr14, $xr22, $xr8 ++0x39 0x02 0xa2 0x0a # CHECK: xvfnmadd.d $xr25, $xr17, $xr0, $xr4 ++0x6b 0x80 0xd5 0x0a # CHECK: xvfnmsub.s $xr11, $xr3, $xr0, $xr11 ++0x62 0x60 0xeb 0x0a # CHECK: xvfnmsub.d $xr2, $xr3, $xr24, $xr22 ++0xfa 0x6d 0x52 0x0c # CHECK: vfcmp.ceq.s $vr26, $vr15, $vr27 ++0xb5 0x06 0x62 0x0c # CHECK: vfcmp.ceq.d $vr21, $vr21, $vr1 ++0x28 0x4d 0x92 0x0c # CHECK: xvfcmp.ceq.s $xr8, $xr9, $xr19 ++0x19 0x72 0xa2 0x0c # CHECK: xvfcmp.ceq.d $xr25, $xr16, $xr28 ++0xf4 0xf6 0x14 0x0d # CHECK: vbitsel.v $vr20, $vr23, $vr29, $vr9 ++0x47 0xf3 0x2b 0x0d # CHECK: xvbitsel.v $xr7, $xr26, $xr28, $xr23 ++0x8b 0x9c 0x54 0x0d # CHECK: vshuf.b $vr11, $vr4, $vr7, $vr9 ++0xb0 0x2a 0x66 0x0d # CHECK: xvshuf.b $xr16, $xr21, $xr10, $xr12 ++0x3c 0x0b 0x38 0x2c # CHECK: vld $vr28, $r25, -510 ++0xdc 0x3d 0x48 0x2c # CHECK: vst $vr28, $r14, 527 ++0xcb 0x00 0x88 0x2c # CHECK: xvld $xr11, $r6, 512 ++0xed 0xfc 0xd2 0x2c # CHECK: xvst $xr13, $r7, 1215 ++0x28 0xfd 0x14 0x30 # CHECK: vldrepl.d $vr8, $r9, -1544 ++0x22 0xd9 0x2e 0x30 # CHECK: vldrepl.w $vr2, $r9, -296 ++0xfc 0xfa 0x41 0x30 # CHECK: vldrepl.h $vr28, $r23, 252 ++0x25 0xad 0xb4 0x30 # CHECK: vldrepl.b $vr5, $r9, -725 ++0x57 0x57 0x15 0x31 # CHECK: vstelm.d $vr23, $r26, 680, 1 ++0xfe 0x8e 0x26 0x31 # CHECK: vstelm.w $vr30, $r23, -372, 1 ++0xcb 0x3c 0x5c 0x31 # CHECK: vstelm.h $vr11, $r6, 30, 7 ++0xe3 0xb1 0xb8 0x31 # CHECK: vstelm.b $vr3, $r15, 44, 14 ++0x18 0xa5 0x11 0x32 # CHECK: xvldrepl.d $xr24, $r8, 840 ++0x0e 0xef 0x21 0x32 # CHECK: xvldrepl.w $xr14, $r24, 492 ++0x32 0x49 0x46 0x32 # CHECK: xvldrepl.h $xr18, $r9, 804 ++0xa6 0xaf 0x8c 0x32 # CHECK: xvldrepl.b $xr6, $r29, 811 ++0x75 0x94 0x13 0x33 # CHECK: xvstelm.d $xr21, $sp, -216, 0 ++0xbf 0xab 0x21 0x33 # CHECK: xvstelm.w $xr31, $r29, 424, 0 ++0xee 0xb4 0x50 0x33 # CHECK: xvstelm.h $xr14, $r7, 90, 4 ++0x15 0xef 0xa3 0x33 # CHECK: xvstelm.b $xr21, $r24, -5, 8 ++0x9d 0x78 0x40 0x38 # CHECK: vldx $vr29, $r4, $r30 ++0x9f 0x77 0x44 0x38 # CHECK: vstx $vr31, $r28, $r29 ++0xc8 0x63 0x48 0x38 # CHECK: xvldx $xr8, $r30, $r24 ++0x22 0x75 0x4c 0x38 # CHECK: xvstx $xr2, $r9, $r29 ++0x5c 0x5f 0x00 0x70 # CHECK: vseq.b $vr28, $vr26, $vr23 ++0x2a 0x94 0x00 0x70 # CHECK: vseq.h $vr10, $vr1, $vr5 ++0x63 0x47 0x01 0x70 # CHECK: vseq.w $vr3, $vr27, $vr17 ++0x65 0x8c 0x01 0x70 # CHECK: vseq.d $vr5, $vr3, $vr3 ++0x3d 0x1d 0x02 0x70 # CHECK: vsle.b $vr29, $vr9, $vr7 ++0x05 0xa7 0x02 0x70 # CHECK: vsle.h $vr5, $vr24, $vr9 ++0xd1 0x53 0x03 0x70 # CHECK: vsle.w $vr17, $vr30, $vr20 ++0xdb 0xb4 0x03 0x70 # CHECK: vsle.d $vr27, $vr6, $vr13 ++0x7e 0x29 0x04 0x70 # CHECK: vsle.bu $vr30, $vr11, $vr10 ++0xb3 0xff 0x04 0x70 # CHECK: vsle.hu $vr19, $vr29, $vr31 ++0x50 0x52 0x05 0x70 # CHECK: vsle.wu $vr16, $vr18, $vr20 ++0x3f 0xa2 0x05 0x70 # CHECK: vsle.du $vr31, $vr17, $vr8 ++0xfa 0x14 0x06 0x70 # CHECK: vslt.b $vr26, $vr7, $vr5 ++0x4e 0xd0 0x06 0x70 # CHECK: vslt.h $vr14, $vr2, $vr20 ++0xae 0x64 0x07 0x70 # CHECK: vslt.w $vr14, $vr5, $vr25 ++0x3a 0xe5 0x07 0x70 # CHECK: vslt.d $vr26, $vr9, $vr25 ++0x5f 0x3a 0x08 0x70 # CHECK: vslt.bu $vr31, $vr18, $vr14 ++0xe5 0x95 0x08 0x70 # CHECK: vslt.hu $vr5, $vr15, $vr5 ++0x9f 0x37 0x09 0x70 # CHECK: vslt.wu $vr31, $vr28, $vr13 ++0x6b 0xda 0x09 0x70 # CHECK: vslt.du $vr11, $vr19, $vr22 ++0x9a 0x7e 0x0a 0x70 # CHECK: vadd.b $vr26, $vr20, $vr31 ++0x2b 0xf7 0x0a 0x70 # CHECK: vadd.h $vr11, $vr25, $vr29 ++0x27 0x37 0x0b 0x70 # CHECK: vadd.w $vr7, $vr25, $vr13 ++0xb0 0xc1 0x0b 0x70 # CHECK: vadd.d $vr16, $vr13, $vr16 ++0x6c 0x54 0x0c 0x70 # CHECK: vsub.b $vr12, $vr3, $vr21 ++0xaf 0xe5 0x0c 0x70 # CHECK: vsub.h $vr15, $vr13, $vr25 ++0x14 0x66 0x0d 0x70 # CHECK: vsub.w $vr20, $vr16, $vr25 ++0x73 0x9c 0x0d 0x70 # CHECK: vsub.d $vr19, $vr3, $vr7 ++0xce 0x17 0x46 0x70 # CHECK: vsadd.b $vr14, $vr30, $vr5 ++0x2a 0xbc 0x46 0x70 # CHECK: vsadd.h $vr10, $vr1, $vr15 ++0xf3 0x2b 0x47 0x70 # CHECK: vsadd.w $vr19, $vr31, $vr10 ++0x7a 0xf2 0x47 0x70 # CHECK: vsadd.d $vr26, $vr19, $vr28 ++0x78 0x1c 0x48 0x70 # CHECK: vssub.b $vr24, $vr3, $vr7 ++0x9f 0xe0 0x48 0x70 # CHECK: vssub.h $vr31, $vr4, $vr24 ++0x7d 0x33 0x49 0x70 # CHECK: vssub.w $vr29, $vr27, $vr12 ++0x17 0xa6 0x49 0x70 # CHECK: vssub.d $vr23, $vr16, $vr9 ++0xba 0x13 0x4a 0x70 # CHECK: vsadd.bu $vr26, $vr29, $vr4 ++0xef 0xa4 0x4a 0x70 # CHECK: vsadd.hu $vr15, $vr7, $vr9 ++0x4d 0x42 0x4b 0x70 # CHECK: vsadd.wu $vr13, $vr18, $vr16 ++0xa4 0x80 0x4b 0x70 # CHECK: vsadd.du $vr4, $vr5, $vr0 ++0x3b 0x36 0x4c 0x70 # CHECK: vssub.bu $vr27, $vr17, $vr13 ++0x05 0x85 0x4c 0x70 # CHECK: vssub.hu $vr5, $vr8, $vr1 ++0x0e 0x59 0x4d 0x70 # CHECK: vssub.wu $vr14, $vr8, $vr22 ++0x31 0xa1 0x4d 0x70 # CHECK: vssub.du $vr17, $vr9, $vr8 ++0x77 0x0a 0x54 0x70 # CHECK: vhaddw.h.b $vr23, $vr19, $vr2 ++0x1a 0xea 0x54 0x70 # CHECK: vhaddw.w.h $vr26, $vr16, $vr26 ++0xe0 0x6f 0x55 0x70 # CHECK: vhaddw.d.w $vr0, $vr31, $vr27 ++0xb9 0xe5 0x55 0x70 # CHECK: vhaddw.q.d $vr25, $vr13, $vr25 ++0xe9 0x16 0x56 0x70 # CHECK: vhsubw.h.b $vr9, $vr23, $vr5 ++0xaf 0xeb 0x56 0x70 # CHECK: vhsubw.w.h $vr15, $vr29, $vr26 ++0x80 0x4b 0x57 0x70 # CHECK: vhsubw.d.w $vr0, $vr28, $vr18 ++0x2e 0xa3 0x57 0x70 # CHECK: vhsubw.q.d $vr14, $vr25, $vr8 ++0x01 0x56 0x58 0x70 # CHECK: vhaddw.hu.bu $vr1, $vr16, $vr21 ++0xbc 0xf6 0x58 0x70 # CHECK: vhaddw.wu.hu $vr28, $vr21, $vr29 ++0x9d 0x42 0x59 0x70 # CHECK: vhaddw.du.wu $vr29, $vr20, $vr16 ++0x42 0xf1 0x59 0x70 # CHECK: vhaddw.qu.du $vr2, $vr10, $vr28 ++0x7f 0x78 0x5a 0x70 # CHECK: vhsubw.hu.bu $vr31, $vr3, $vr30 ++0x25 0xad 0x5a 0x70 # CHECK: vhsubw.wu.hu $vr5, $vr9, $vr11 ++0xf7 0x5b 0x5b 0x70 # CHECK: vhsubw.du.wu $vr23, $vr31, $vr22 ++0x84 0xcb 0x5b 0x70 # CHECK: vhsubw.qu.du $vr4, $vr28, $vr18 ++0xb2 0x2d 0x5c 0x70 # CHECK: vadda.b $vr18, $vr13, $vr11 ++0xd1 0xb1 0x5c 0x70 # CHECK: vadda.h $vr17, $vr14, $vr12 ++0x76 0x0d 0x5d 0x70 # CHECK: vadda.w $vr22, $vr11, $vr3 ++0x18 0xbf 0x5d 0x70 # CHECK: vadda.d $vr24, $vr24, $vr15 ++0x77 0x46 0x60 0x70 # CHECK: vabsd.b $vr23, $vr19, $vr17 ++0xee 0xb7 0x60 0x70 # CHECK: vabsd.h $vr14, $vr31, $vr13 ++0x38 0x24 0x61 0x70 # CHECK: vabsd.w $vr24, $vr1, $vr9 ++0x9f 0x82 0x61 0x70 # CHECK: vabsd.d $vr31, $vr20, $vr0 ++0x97 0x75 0x62 0x70 # CHECK: vabsd.bu $vr23, $vr12, $vr29 ++0x72 0x86 0x62 0x70 # CHECK: vabsd.hu $vr18, $vr19, $vr1 ++0xad 0x72 0x63 0x70 # CHECK: vabsd.wu $vr13, $vr21, $vr28 ++0x50 0xaf 0x63 0x70 # CHECK: vabsd.du $vr16, $vr26, $vr11 ++0xa1 0x6e 0x64 0x70 # CHECK: vavg.b $vr1, $vr21, $vr27 ++0x54 0xbf 0x64 0x70 # CHECK: vavg.h $vr20, $vr26, $vr15 ++0x5d 0x0e 0x65 0x70 # CHECK: vavg.w $vr29, $vr18, $vr3 ++0xf3 0xfd 0x65 0x70 # CHECK: vavg.d $vr19, $vr15, $vr31 ++0x6b 0x45 0x66 0x70 # CHECK: vavg.bu $vr11, $vr11, $vr17 ++0x9e 0xb7 0x66 0x70 # CHECK: vavg.hu $vr30, $vr28, $vr13 ++0xe7 0x28 0x67 0x70 # CHECK: vavg.wu $vr7, $vr7, $vr10 ++0xf9 0xb0 0x67 0x70 # CHECK: vavg.du $vr25, $vr7, $vr12 ++0xbd 0x1d 0x68 0x70 # CHECK: vavgr.b $vr29, $vr13, $vr7 ++0x85 0xcf 0x68 0x70 # CHECK: vavgr.h $vr5, $vr28, $vr19 ++0xf3 0x39 0x69 0x70 # CHECK: vavgr.w $vr19, $vr15, $vr14 ++0x03 0x88 0x69 0x70 # CHECK: vavgr.d $vr3, $vr0, $vr2 ++0x77 0x7d 0x6a 0x70 # CHECK: vavgr.bu $vr23, $vr11, $vr31 ++0x79 0xa2 0x6a 0x70 # CHECK: vavgr.hu $vr25, $vr19, $vr8 ++0x3e 0x33 0x6b 0x70 # CHECK: vavgr.wu $vr30, $vr25, $vr12 ++0x99 0xe6 0x6b 0x70 # CHECK: vavgr.du $vr25, $vr20, $vr25 ++0x5c 0x6b 0x70 0x70 # CHECK: vmax.b $vr28, $vr26, $vr26 ++0xa8 0xad 0x70 0x70 # CHECK: vmax.h $vr8, $vr13, $vr11 ++0x95 0x7f 0x71 0x70 # CHECK: vmax.w $vr21, $vr28, $vr31 ++0xc1 0xeb 0x71 0x70 # CHECK: vmax.d $vr1, $vr30, $vr26 ++0xca 0x25 0x72 0x70 # CHECK: vmin.b $vr10, $vr14, $vr9 ++0x6a 0xd5 0x72 0x70 # CHECK: vmin.h $vr10, $vr11, $vr21 ++0x1a 0x30 0x73 0x70 # CHECK: vmin.w $vr26, $vr0, $vr12 ++0x53 0x82 0x73 0x70 # CHECK: vmin.d $vr19, $vr18, $vr0 ++0x22 0x73 0x74 0x70 # CHECK: vmax.bu $vr2, $vr25, $vr28 ++0xc9 0xfa 0x74 0x70 # CHECK: vmax.hu $vr9, $vr22, $vr30 ++0x35 0x6f 0x75 0x70 # CHECK: vmax.wu $vr21, $vr25, $vr27 ++0xc3 0xe5 0x75 0x70 # CHECK: vmax.du $vr3, $vr14, $vr25 ++0xf8 0x6c 0x76 0x70 # CHECK: vmin.bu $vr24, $vr7, $vr27 ++0x92 0xf7 0x76 0x70 # CHECK: vmin.hu $vr18, $vr28, $vr29 ++0x9a 0x08 0x77 0x70 # CHECK: vmin.wu $vr26, $vr4, $vr2 ++0x0d 0x90 0x77 0x70 # CHECK: vmin.du $vr13, $vr0, $vr4 ++0xa1 0x5e 0x84 0x70 # CHECK: vmul.b $vr1, $vr21, $vr23 ++0xa9 0xe6 0x84 0x70 # CHECK: vmul.h $vr9, $vr21, $vr25 ++0x10 0x71 0x85 0x70 # CHECK: vmul.w $vr16, $vr8, $vr28 ++0x24 0xae 0x85 0x70 # CHECK: vmul.d $vr4, $vr17, $vr11 ++0x0c 0x23 0x86 0x70 # CHECK: vmuh.b $vr12, $vr24, $vr8 ++0xa6 0xe2 0x86 0x70 # CHECK: vmuh.h $vr6, $vr21, $vr24 ++0xab 0x7b 0x87 0x70 # CHECK: vmuh.w $vr11, $vr29, $vr30 ++0x21 0xe6 0x87 0x70 # CHECK: vmuh.d $vr1, $vr17, $vr25 ++0xbd 0x2b 0x88 0x70 # CHECK: vmuh.bu $vr29, $vr29, $vr10 ++0x38 0xd5 0x88 0x70 # CHECK: vmuh.hu $vr24, $vr9, $vr21 ++0x8f 0x4e 0x89 0x70 # CHECK: vmuh.wu $vr15, $vr20, $vr19 ++0x80 0x87 0x89 0x70 # CHECK: vmuh.du $vr0, $vr28, $vr1 ++0x1b 0x10 0xa8 0x70 # CHECK: vmadd.b $vr27, $vr0, $vr4 ++0x93 0xf2 0xa8 0x70 # CHECK: vmadd.h $vr19, $vr20, $vr28 ++0xef 0x0c 0xa9 0x70 # CHECK: vmadd.w $vr15, $vr7, $vr3 ++0x39 0xfb 0xa9 0x70 # CHECK: vmadd.d $vr25, $vr25, $vr30 ++0x38 0x6b 0xaa 0x70 # CHECK: vmsub.b $vr24, $vr25, $vr26 ++0x0c 0xb4 0xaa 0x70 # CHECK: vmsub.h $vr12, $vr0, $vr13 ++0x1a 0x62 0xab 0x70 # CHECK: vmsub.w $vr26, $vr16, $vr24 ++0x4d 0xa1 0xab 0x70 # CHECK: vmsub.d $vr13, $vr10, $vr8 ++0x92 0x57 0xe0 0x70 # CHECK: vdiv.b $vr18, $vr28, $vr21 ++0x11 0x87 0xe0 0x70 # CHECK: vdiv.h $vr17, $vr24, $vr1 ++0x43 0x59 0xe1 0x70 # CHECK: vdiv.w $vr3, $vr10, $vr22 ++0xaf 0xa1 0xe1 0x70 # CHECK: vdiv.d $vr15, $vr13, $vr8 ++0x33 0x53 0xe2 0x70 # CHECK: vmod.b $vr19, $vr25, $vr20 ++0x02 0xdb 0xe2 0x70 # CHECK: vmod.h $vr2, $vr24, $vr22 ++0x5f 0x02 0xe3 0x70 # CHECK: vmod.w $vr31, $vr18, $vr0 ++0x1f 0x88 0xe3 0x70 # CHECK: vmod.d $vr31, $vr0, $vr2 ++0x8f 0x0c 0xe4 0x70 # CHECK: vdiv.bu $vr15, $vr4, $vr3 ++0xf1 0xf4 0xe4 0x70 # CHECK: vdiv.hu $vr17, $vr7, $vr29 ++0x5b 0x0d 0xe5 0x70 # CHECK: vdiv.wu $vr27, $vr10, $vr3 ++0x08 0xeb 0xe5 0x70 # CHECK: vdiv.du $vr8, $vr24, $vr26 ++0xca 0x62 0xe6 0x70 # CHECK: vmod.bu $vr10, $vr22, $vr24 ++0xf3 0xe3 0xe6 0x70 # CHECK: vmod.hu $vr19, $vr31, $vr24 ++0x1a 0x37 0xe7 0x70 # CHECK: vmod.wu $vr26, $vr24, $vr13 ++0x74 0xaa 0xe7 0x70 # CHECK: vmod.du $vr20, $vr19, $vr10 ++0x5c 0x7a 0xe8 0x70 # CHECK: vsll.b $vr28, $vr18, $vr30 ++0x96 0xf8 0xe8 0x70 # CHECK: vsll.h $vr22, $vr4, $vr30 ++0x21 0x23 0xe9 0x70 # CHECK: vsll.w $vr1, $vr25, $vr8 ++0x5f 0xbe 0xe9 0x70 # CHECK: vsll.d $vr31, $vr18, $vr15 ++0x85 0x41 0xea 0x70 # CHECK: vsrl.b $vr5, $vr12, $vr16 ++0xa9 0xf0 0xea 0x70 # CHECK: vsrl.h $vr9, $vr5, $vr28 ++0x1e 0x06 0xeb 0x70 # CHECK: vsrl.w $vr30, $vr16, $vr1 ++0xfc 0xee 0xeb 0x70 # CHECK: vsrl.d $vr28, $vr23, $vr27 ++0x2f 0x66 0xec 0x70 # CHECK: vsra.b $vr15, $vr17, $vr25 ++0x00 0x95 0xec 0x70 # CHECK: vsra.h $vr0, $vr8, $vr5 ++0x3d 0x1d 0xed 0x70 # CHECK: vsra.w $vr29, $vr9, $vr7 ++0x76 0xcc 0xed 0x70 # CHECK: vsra.d $vr22, $vr3, $vr19 ++0x08 0x22 0xee 0x70 # CHECK: vrotr.b $vr8, $vr16, $vr8 ++0xae 0xac 0xee 0x70 # CHECK: vrotr.h $vr14, $vr5, $vr11 ++0x91 0x67 0xef 0x70 # CHECK: vrotr.w $vr17, $vr28, $vr25 ++0x92 0xcf 0xef 0x70 # CHECK: vrotr.d $vr18, $vr28, $vr19 ++0x61 0x47 0xf0 0x70 # CHECK: vsrlr.b $vr1, $vr27, $vr17 ++0xda 0xa9 0xf0 0x70 # CHECK: vsrlr.h $vr26, $vr14, $vr10 ++0xa3 0x63 0xf1 0x70 # CHECK: vsrlr.w $vr3, $vr29, $vr24 ++0x97 0xa8 0xf1 0x70 # CHECK: vsrlr.d $vr23, $vr4, $vr10 ++0x59 0x54 0xf2 0x70 # CHECK: vsrar.b $vr25, $vr2, $vr21 ++0x64 0xd1 0xf2 0x70 # CHECK: vsrar.h $vr4, $vr11, $vr20 ++0xab 0x76 0xf3 0x70 # CHECK: vsrar.w $vr11, $vr21, $vr29 ++0xbd 0x88 0xf3 0x70 # CHECK: vsrar.d $vr29, $vr5, $vr2 ++0xd8 0xf5 0xf4 0x70 # CHECK: vsrln.b.h $vr24, $vr14, $vr29 ++0xda 0x42 0xf5 0x70 # CHECK: vsrln.h.w $vr26, $vr22, $vr16 ++0xf1 0x8b 0xf5 0x70 # CHECK: vsrln.w.d $vr17, $vr31, $vr2 ++0x1f 0xdc 0xf6 0x70 # CHECK: vsran.b.h $vr31, $vr0, $vr23 ++0x94 0x75 0xf7 0x70 # CHECK: vsran.h.w $vr20, $vr12, $vr29 ++0x22 0x88 0xf7 0x70 # CHECK: vsran.w.d $vr2, $vr1, $vr2 ++0x93 0x83 0xf8 0x70 # CHECK: vsrlrn.b.h $vr19, $vr28, $vr0 ++0xb7 0x3b 0xf9 0x70 # CHECK: vsrlrn.h.w $vr23, $vr29, $vr14 ++0x45 0x97 0xf9 0x70 # CHECK: vsrlrn.w.d $vr5, $vr26, $vr5 ++0xf1 0x9d 0xfa 0x70 # CHECK: vsrarn.b.h $vr17, $vr15, $vr7 ++0x4c 0x75 0xfb 0x70 # CHECK: vsrarn.h.w $vr12, $vr10, $vr29 ++0x58 0xef 0xfb 0x70 # CHECK: vsrarn.w.d $vr24, $vr26, $vr27 ++0x81 0xb9 0xfc 0x70 # CHECK: vssrln.b.h $vr1, $vr12, $vr14 ++0x0b 0x49 0xfd 0x70 # CHECK: vssrln.h.w $vr11, $vr8, $vr18 ++0xff 0x99 0xfd 0x70 # CHECK: vssrln.w.d $vr31, $vr15, $vr6 ++0xad 0xe0 0xfe 0x70 # CHECK: vssran.b.h $vr13, $vr5, $vr24 ++0x44 0x1f 0xff 0x70 # CHECK: vssran.h.w $vr4, $vr26, $vr7 ++0x59 0x99 0xff 0x70 # CHECK: vssran.w.d $vr25, $vr10, $vr6 ++0x9c 0x9b 0x00 0x71 # CHECK: vssrlrn.b.h $vr28, $vr28, $vr6 ++0xef 0x46 0x01 0x71 # CHECK: vssrlrn.h.w $vr15, $vr23, $vr17 ++0x2c 0x89 0x01 0x71 # CHECK: vssrlrn.w.d $vr12, $vr9, $vr2 ++0x21 0xc7 0x02 0x71 # CHECK: vssrarn.b.h $vr1, $vr25, $vr17 ++0x23 0x5d 0x03 0x71 # CHECK: vssrarn.h.w $vr3, $vr9, $vr23 ++0x2e 0xed 0x03 0x71 # CHECK: vssrarn.w.d $vr14, $vr9, $vr27 ++0x10 0xbf 0x04 0x71 # CHECK: vssrln.bu.h $vr16, $vr24, $vr15 ++0xf5 0x7a 0x05 0x71 # CHECK: vssrln.hu.w $vr21, $vr23, $vr30 ++0x0c 0xf9 0x05 0x71 # CHECK: vssrln.wu.d $vr12, $vr8, $vr30 ++0x45 0xb2 0x06 0x71 # CHECK: vssran.bu.h $vr5, $vr18, $vr12 ++0xe0 0x70 0x07 0x71 # CHECK: vssran.hu.w $vr0, $vr7, $vr28 ++0x65 0xa1 0x07 0x71 # CHECK: vssran.wu.d $vr5, $vr11, $vr8 ++0x32 0x8f 0x08 0x71 # CHECK: vssrlrn.bu.h $vr18, $vr25, $vr3 ++0x33 0x50 0x09 0x71 # CHECK: vssrlrn.hu.w $vr19, $vr1, $vr20 ++0xc6 0xcb 0x09 0x71 # CHECK: vssrlrn.wu.d $vr6, $vr30, $vr18 ++0xac 0x8d 0x0a 0x71 # CHECK: vssrarn.bu.h $vr12, $vr13, $vr3 ++0xb2 0x50 0x0b 0x71 # CHECK: vssrarn.hu.w $vr18, $vr5, $vr20 ++0x17 0xd5 0x0b 0x71 # CHECK: vssrarn.wu.d $vr23, $vr8, $vr21 ++0x4e 0x7c 0x0c 0x71 # CHECK: vbitclr.b $vr14, $vr2, $vr31 ++0x31 0xa3 0x0c 0x71 # CHECK: vbitclr.h $vr17, $vr25, $vr8 ++0x72 0x0d 0x0d 0x71 # CHECK: vbitclr.w $vr18, $vr11, $vr3 ++0xff 0xf5 0x0d 0x71 # CHECK: vbitclr.d $vr31, $vr15, $vr29 ++0xa8 0x43 0x0e 0x71 # CHECK: vbitset.b $vr8, $vr29, $vr16 ++0x25 0xc6 0x0e 0x71 # CHECK: vbitset.h $vr5, $vr17, $vr17 ++0x65 0x16 0x0f 0x71 # CHECK: vbitset.w $vr5, $vr19, $vr5 ++0x65 0xab 0x0f 0x71 # CHECK: vbitset.d $vr5, $vr27, $vr10 ++0xb0 0x20 0x10 0x71 # CHECK: vbitrev.b $vr16, $vr5, $vr8 ++0xac 0xb3 0x10 0x71 # CHECK: vbitrev.h $vr12, $vr29, $vr12 ++0xc3 0x39 0x11 0x71 # CHECK: vbitrev.w $vr3, $vr14, $vr14 ++0x7f 0xbb 0x11 0x71 # CHECK: vbitrev.d $vr31, $vr27, $vr14 ++0x16 0x4f 0x16 0x71 # CHECK: vpackev.b $vr22, $vr24, $vr19 ++0x5c 0xc8 0x16 0x71 # CHECK: vpackev.h $vr28, $vr2, $vr18 ++0x75 0x10 0x17 0x71 # CHECK: vpackev.w $vr21, $vr3, $vr4 ++0xb8 0xae 0x17 0x71 # CHECK: vpackev.d $vr24, $vr21, $vr11 ++0xec 0x6b 0x18 0x71 # CHECK: vpackod.b $vr12, $vr31, $vr26 ++0x79 0xc0 0x18 0x71 # CHECK: vpackod.h $vr25, $vr3, $vr16 ++0x55 0x3e 0x19 0x71 # CHECK: vpackod.w $vr21, $vr18, $vr15 ++0x62 0x80 0x19 0x71 # CHECK: vpackod.d $vr2, $vr3, $vr0 ++0x08 0x71 0x1a 0x71 # CHECK: vilvl.b $vr8, $vr8, $vr28 ++0x14 0xfc 0x1a 0x71 # CHECK: vilvl.h $vr20, $vr0, $vr31 ++0x4b 0x45 0x1b 0x71 # CHECK: vilvl.w $vr11, $vr10, $vr17 ++0xe7 0x84 0x1b 0x71 # CHECK: vilvl.d $vr7, $vr7, $vr1 ++0x6b 0x05 0x1c 0x71 # CHECK: vilvh.b $vr11, $vr11, $vr1 ++0xe0 0xb7 0x1c 0x71 # CHECK: vilvh.h $vr0, $vr31, $vr13 ++0xbc 0x1e 0x1d 0x71 # CHECK: vilvh.w $vr28, $vr21, $vr7 ++0x77 0xcc 0x1d 0x71 # CHECK: vilvh.d $vr23, $vr3, $vr19 ++0xa1 0x22 0x1e 0x71 # CHECK: vpickev.b $vr1, $vr21, $vr8 ++0x30 0xa4 0x1e 0x71 # CHECK: vpickev.h $vr16, $vr1, $vr9 ++0xad 0x11 0x1f 0x71 # CHECK: vpickev.w $vr13, $vr13, $vr4 ++0xcb 0xfb 0x1f 0x71 # CHECK: vpickev.d $vr11, $vr30, $vr30 ++0x67 0x35 0x20 0x71 # CHECK: vpickod.b $vr7, $vr11, $vr13 ++0x72 0x84 0x20 0x71 # CHECK: vpickod.h $vr18, $vr3, $vr1 ++0x03 0x4e 0x21 0x71 # CHECK: vpickod.w $vr3, $vr16, $vr19 ++0xac 0xd5 0x21 0x71 # CHECK: vpickod.d $vr12, $vr13, $vr21 ++0x2f 0x4e 0x22 0x71 # CHECK: vreplve.b $vr15, $vr17, $r19 ++0xee 0x92 0x22 0x71 # CHECK: vreplve.h $vr14, $vr23, $r4 ++0x7d 0x6e 0x23 0x71 # CHECK: vreplve.w $vr29, $vr19, $r27 ++0x8d 0xd2 0x23 0x71 # CHECK: vreplve.d $vr13, $vr20, $r20 ++0x59 0x54 0x26 0x71 # CHECK: vand.v $vr25, $vr2, $vr21 ++0x64 0xc3 0x26 0x71 # CHECK: vor.v $vr4, $vr27, $vr16 ++0x3e 0x13 0x27 0x71 # CHECK: vxor.v $vr30, $vr25, $vr4 ++0x49 0xd8 0x27 0x71 # CHECK: vnor.v $vr9, $vr2, $vr22 ++0x54 0x13 0x28 0x71 # CHECK: vandn.v $vr20, $vr26, $vr4 ++0xa6 0xfa 0x28 0x71 # CHECK: vorn.v $vr6, $vr21, $vr30 ++0x2b 0x35 0x2b 0x71 # CHECK: vfrstp.b $vr11, $vr9, $vr13 ++0x55 0xdb 0x2b 0x71 # CHECK: vfrstp.h $vr21, $vr26, $vr22 ++0xe9 0x40 0x2d 0x71 # CHECK: vadd.q $vr9, $vr7, $vr16 ++0x22 0xc0 0x2d 0x71 # CHECK: vsub.q $vr2, $vr1, $vr16 ++0x42 0x38 0x2e 0x71 # CHECK: vsigncov.b $vr2, $vr2, $vr14 ++0xb5 0xb6 0x2e 0x71 # CHECK: vsigncov.h $vr21, $vr21, $vr13 ++0xf5 0x14 0x2f 0x71 # CHECK: vsigncov.w $vr21, $vr7, $vr5 ++0x4a 0x8d 0x2f 0x71 # CHECK: vsigncov.d $vr10, $vr10, $vr3 ++0x8a 0x84 0x30 0x71 # CHECK: vfadd.s $vr10, $vr4, $vr1 ++0x6f 0x0b 0x31 0x71 # CHECK: vfadd.d $vr15, $vr27, $vr2 ++0x0e 0xa6 0x32 0x71 # CHECK: vfsub.s $vr14, $vr16, $vr9 ++0x24 0x20 0x33 0x71 # CHECK: vfsub.d $vr4, $vr1, $vr8 ++0x40 0x9a 0x38 0x71 # CHECK: vfmul.s $vr0, $vr18, $vr6 ++0xfb 0x7b 0x39 0x71 # CHECK: vfmul.d $vr27, $vr31, $vr30 ++0xe3 0x98 0x3a 0x71 # CHECK: vfdiv.s $vr3, $vr7, $vr6 ++0xd0 0x78 0x3b 0x71 # CHECK: vfdiv.d $vr16, $vr6, $vr30 ++0xd2 0xa3 0x3c 0x71 # CHECK: vfmax.s $vr18, $vr30, $vr8 ++0x13 0x61 0x3d 0x71 # CHECK: vfmax.d $vr19, $vr8, $vr24 ++0x58 0x9b 0x3e 0x71 # CHECK: vfmin.s $vr24, $vr26, $vr6 ++0x30 0x07 0x3f 0x71 # CHECK: vfmin.d $vr16, $vr25, $vr1 ++0xe8 0xb8 0x40 0x71 # CHECK: vfmaxa.s $vr8, $vr7, $vr14 ++0x0a 0x11 0x41 0x71 # CHECK: vfmaxa.d $vr10, $vr8, $vr4 ++0xd0 0xc8 0x42 0x71 # CHECK: vfmina.s $vr16, $vr6, $vr18 ++0xfa 0x38 0x43 0x71 # CHECK: vfmina.d $vr26, $vr7, $vr14 ++0x9e 0x60 0x46 0x71 # CHECK: vfcvt.h.s $vr30, $vr4, $vr24 ++0x30 0x92 0x46 0x71 # CHECK: vfcvt.s.d $vr16, $vr17, $vr4 ++0xf9 0x2a 0x48 0x71 # CHECK: vffint.s.l $vr25, $vr23, $vr10 ++0xc9 0xee 0x49 0x71 # CHECK: vftint.w.d $vr9, $vr22, $vr27 ++0x5f 0x75 0x4a 0x71 # CHECK: vftintrm.w.d $vr31, $vr10, $vr29 ++0xb7 0xbd 0x4a 0x71 # CHECK: vftintrp.w.d $vr23, $vr13, $vr15 ++0x32 0x19 0x4b 0x71 # CHECK: vftintrz.w.d $vr18, $vr9, $vr6 ++0x95 0xf9 0x4b 0x71 # CHECK: vftintrne.w.d $vr21, $vr12, $vr30 ++0x63 0x89 0x7a 0x71 # CHECK: vshuf.h $vr3, $vr11, $vr2 ++0x95 0x74 0x7b 0x71 # CHECK: vshuf.w $vr21, $vr4, $vr29 ++0xeb 0xca 0x7b 0x71 # CHECK: vshuf.d $vr11, $vr23, $vr18 ++0xdb 0x1d 0x80 0x72 # CHECK: vseqi.b $vr27, $vr14, 7 ++0x77 0xeb 0x80 0x72 # CHECK: vseqi.h $vr23, $vr27, -6 ++0x08 0x41 0x81 0x72 # CHECK: vseqi.w $vr8, $vr8, -16 ++0xab 0x94 0x81 0x72 # CHECK: vseqi.d $vr11, $vr5, 5 ++0x68 0x1f 0x82 0x72 # CHECK: vslei.b $vr8, $vr27, 7 ++0xbb 0xef 0x82 0x72 # CHECK: vslei.h $vr27, $vr29, -5 ++0xb7 0x75 0x83 0x72 # CHECK: vslei.w $vr23, $vr13, -3 ++0xe5 0xe1 0x83 0x72 # CHECK: vslei.d $vr5, $vr15, -8 ++0x5d 0x25 0x84 0x72 # CHECK: vslei.bu $vr29, $vr10, 9 ++0x5d 0xae 0x84 0x72 # CHECK: vslei.hu $vr29, $vr18, 11 ++0x28 0x08 0x85 0x72 # CHECK: vslei.wu $vr8, $vr1, 2 ++0xb0 0xa8 0x85 0x72 # CHECK: vslei.du $vr16, $vr5, 10 ++0x88 0x78 0x86 0x72 # CHECK: vslti.b $vr8, $vr4, -2 ++0xfa 0xc8 0x86 0x72 # CHECK: vslti.h $vr26, $vr7, -14 ++0x1c 0x31 0x87 0x72 # CHECK: vslti.w $vr28, $vr8, 12 ++0x64 0xa7 0x87 0x72 # CHECK: vslti.d $vr4, $vr27, 9 ++0xca 0x49 0x88 0x72 # CHECK: vslti.bu $vr10, $vr14, 18 ++0x9c 0xfb 0x88 0x72 # CHECK: vslti.hu $vr28, $vr28, 30 ++0x6f 0x6f 0x89 0x72 # CHECK: vslti.wu $vr15, $vr27, 27 ++0x3e 0xce 0x89 0x72 # CHECK: vslti.du $vr30, $vr17, 19 ++0x26 0x48 0x8a 0x72 # CHECK: vaddi.bu $vr6, $vr1, 18 ++0xcc 0x95 0x8a 0x72 # CHECK: vaddi.hu $vr12, $vr14, 5 ++0x1c 0x68 0x8b 0x72 # CHECK: vaddi.wu $vr28, $vr0, 26 ++0xaa 0x88 0x8b 0x72 # CHECK: vaddi.du $vr10, $vr5, 2 ++0x96 0x0b 0x8c 0x72 # CHECK: vsubi.bu $vr22, $vr28, 2 ++0xc0 0xfe 0x8c 0x72 # CHECK: vsubi.hu $vr0, $vr22, 31 ++0xd4 0x14 0x8d 0x72 # CHECK: vsubi.wu $vr20, $vr6, 5 ++0x72 0x85 0x8d 0x72 # CHECK: vsubi.du $vr18, $vr11, 1 ++0x44 0x13 0x8e 0x72 # CHECK: vbsll.v $vr4, $vr26, 4 ++0xe7 0xbf 0x8e 0x72 # CHECK: vbsrl.v $vr7, $vr31, 15 ++0xf3 0x39 0x90 0x72 # CHECK: vmaxi.b $vr19, $vr15, 14 ++0x79 0xd0 0x90 0x72 # CHECK: vmaxi.h $vr25, $vr3, -12 ++0x34 0x17 0x91 0x72 # CHECK: vmaxi.w $vr20, $vr25, 5 ++0x49 0xb1 0x91 0x72 # CHECK: vmaxi.d $vr9, $vr10, 12 ++0xbe 0x72 0x92 0x72 # CHECK: vmini.b $vr30, $vr21, -4 ++0x8b 0xf7 0x92 0x72 # CHECK: vmini.h $vr11, $vr28, -3 ++0x26 0x5f 0x93 0x72 # CHECK: vmini.w $vr6, $vr25, -9 ++0x1c 0x89 0x93 0x72 # CHECK: vmini.d $vr28, $vr8, 2 ++0x0d 0x4f 0x94 0x72 # CHECK: vmaxi.bu $vr13, $vr24, 19 ++0x23 0xd8 0x94 0x72 # CHECK: vmaxi.hu $vr3, $vr1, 22 ++0x61 0x5c 0x95 0x72 # CHECK: vmaxi.wu $vr1, $vr3, 23 ++0x46 0xd6 0x95 0x72 # CHECK: vmaxi.du $vr6, $vr18, 21 ++0x4a 0x50 0x96 0x72 # CHECK: vmini.bu $vr10, $vr2, 20 ++0x31 0xbe 0x96 0x72 # CHECK: vmini.hu $vr17, $vr17, 15 ++0x7a 0x5f 0x97 0x72 # CHECK: vmini.wu $vr26, $vr27, 23 ++0x6c 0xa3 0x97 0x72 # CHECK: vmini.du $vr12, $vr27, 8 ++0x1a 0x25 0x9a 0x72 # CHECK: vfrstpi.b $vr26, $vr8, 9 ++0x50 0xd0 0x9a 0x72 # CHECK: vfrstpi.h $vr16, $vr2, 20 ++0x25 0x02 0x9c 0x72 # CHECK: vclo.b $vr5, $vr17 ++0x88 0x04 0x9c 0x72 # CHECK: vclo.h $vr8, $vr4 ++0xa1 0x09 0x9c 0x72 # CHECK: vclo.w $vr1, $vr13 ++0xe0 0x0e 0x9c 0x72 # CHECK: vclo.d $vr0, $vr23 ++0x24 0x13 0x9c 0x72 # CHECK: vclz.b $vr4, $vr25 ++0x21 0x17 0x9c 0x72 # CHECK: vclz.h $vr1, $vr25 ++0xa1 0x18 0x9c 0x72 # CHECK: vclz.w $vr1, $vr5 ++0x30 0x1e 0x9c 0x72 # CHECK: vclz.d $vr16, $vr17 ++0x64 0x20 0x9c 0x72 # CHECK: vpcnt.b $vr4, $vr3 ++0x2f 0x26 0x9c 0x72 # CHECK: vpcnt.h $vr15, $vr17 ++0x0d 0x29 0x9c 0x72 # CHECK: vpcnt.w $vr13, $vr8 ++0x00 0x2d 0x9c 0x72 # CHECK: vpcnt.d $vr0, $vr8 ++0x0e 0x33 0x9c 0x72 # CHECK: vneg.b $vr14, $vr24 ++0xf8 0x34 0x9c 0x72 # CHECK: vneg.h $vr24, $vr7 ++0xb3 0x38 0x9c 0x72 # CHECK: vneg.w $vr19, $vr5 ++0x83 0x3f 0x9c 0x72 # CHECK: vneg.d $vr3, $vr28 ++0x3f 0x43 0x9c 0x72 # CHECK: vmskltz.b $vr31, $vr25 ++0x89 0x46 0x9c 0x72 # CHECK: vmskltz.h $vr9, $vr20 ++0x56 0x4b 0x9c 0x72 # CHECK: vmskltz.w $vr22, $vr26 ++0x5c 0x4d 0x9c 0x72 # CHECK: vmskltz.d $vr28, $vr10 ++0xa7 0x50 0x9c 0x72 # CHECK: vmskgez.b $vr7, $vr5 ++0x94 0x61 0x9c 0x72 # CHECK: vmsknz.b $vr20, $vr12 ++0xc5 0x99 0x9c 0x72 # CHECK: vseteqz.v $fcc5, $vr14 ++0x02 0x9d 0x9c 0x72 # CHECK: vsetnez.v $fcc2, $vr8 ++0x80 0xa2 0x9c 0x72 # CHECK: vsetanyeqz.b $fcc0, $vr20 ++0x04 0xa6 0x9c 0x72 # CHECK: vsetanyeqz.h $fcc4, $vr16 ++0x47 0xa8 0x9c 0x72 # CHECK: vsetanyeqz.w $fcc7, $vr2 ++0x84 0xad 0x9c 0x72 # CHECK: vsetanyeqz.d $fcc4, $vr12 ++0x07 0xb0 0x9c 0x72 # CHECK: vsetallnez.b $fcc7, $vr0 ++0x62 0xb5 0x9c 0x72 # CHECK: vsetallnez.h $fcc2, $vr11 ++0x26 0xbb 0x9c 0x72 # CHECK: vsetallnez.w $fcc6, $vr25 ++0xe7 0xbf 0x9c 0x72 # CHECK: vsetallnez.d $fcc7, $vr31 ++0x8e 0xc7 0x9c 0x72 # CHECK: vflogb.s $vr14, $vr28 ++0x3d 0xc9 0x9c 0x72 # CHECK: vflogb.d $vr29, $vr9 ++0xa3 0xd5 0x9c 0x72 # CHECK: vfclass.s $vr3, $vr13 ++0xe5 0xd9 0x9c 0x72 # CHECK: vfclass.d $vr5, $vr15 ++0x73 0xe7 0x9c 0x72 # CHECK: vfsqrt.s $vr19, $vr27 ++0x7f 0xe8 0x9c 0x72 # CHECK: vfsqrt.d $vr31, $vr3 ++0x18 0xf6 0x9c 0x72 # CHECK: vfrecip.s $vr24, $vr16 ++0x77 0xfa 0x9c 0x72 # CHECK: vfrecip.d $vr23, $vr19 ++0xf2 0x05 0x9d 0x72 # CHECK: vfrsqrt.s $vr18, $vr15 ++0xf2 0x0b 0x9d 0x72 # CHECK: vfrsqrt.d $vr18, $vr31 ++0x7a 0x35 0x9d 0x72 # CHECK: vfrint.s $vr26, $vr11 ++0x58 0x3a 0x9d 0x72 # CHECK: vfrint.d $vr24, $vr18 ++0x65 0x44 0x9d 0x72 # CHECK: vfrintrm.s $vr5, $vr3 ++0x57 0x49 0x9d 0x72 # CHECK: vfrintrm.d $vr23, $vr10 ++0x54 0x54 0x9d 0x72 # CHECK: vfrintrp.s $vr20, $vr2 ++0x3e 0x5a 0x9d 0x72 # CHECK: vfrintrp.d $vr30, $vr17 ++0xd3 0x64 0x9d 0x72 # CHECK: vfrintrz.s $vr19, $vr6 ++0x50 0x6a 0x9d 0x72 # CHECK: vfrintrz.d $vr16, $vr18 ++0x08 0x77 0x9d 0x72 # CHECK: vfrintrne.s $vr8, $vr24 ++0xa6 0x78 0x9d 0x72 # CHECK: vfrintrne.d $vr6, $vr5 ++0xc4 0xe8 0x9d 0x72 # CHECK: vfcvtl.s.h $vr4, $vr6 ++0xf0 0xec 0x9d 0x72 # CHECK: vfcvth.s.h $vr16, $vr7 ++0x50 0xf1 0x9d 0x72 # CHECK: vfcvtl.d.s $vr16, $vr10 ++0x3c 0xf7 0x9d 0x72 # CHECK: vfcvth.d.s $vr28, $vr25 ++0x1c 0x02 0x9e 0x72 # CHECK: vffint.s.w $vr28, $vr16 ++0xe4 0x07 0x9e 0x72 # CHECK: vffint.s.wu $vr4, $vr31 ++0x32 0x0b 0x9e 0x72 # CHECK: vffint.d.l $vr18, $vr25 ++0x38 0x0e 0x9e 0x72 # CHECK: vffint.d.lu $vr24, $vr17 ++0x62 0x13 0x9e 0x72 # CHECK: vffintl.d.w $vr2, $vr27 ++0x04 0x16 0x9e 0x72 # CHECK: vffinth.d.w $vr4, $vr16 ++0x11 0x30 0x9e 0x72 # CHECK: vftint.w.s $vr17, $vr0 ++0x57 0x36 0x9e 0x72 # CHECK: vftint.l.d $vr23, $vr18 ++0x97 0x38 0x9e 0x72 # CHECK: vftintrm.w.s $vr23, $vr4 ++0xde 0x3d 0x9e 0x72 # CHECK: vftintrm.l.d $vr30, $vr14 ++0x07 0x40 0x9e 0x72 # CHECK: vftintrp.w.s $vr7, $vr0 ++0x9c 0x46 0x9e 0x72 # CHECK: vftintrp.l.d $vr28, $vr20 ++0xfc 0x4b 0x9e 0x72 # CHECK: vftintrz.w.s $vr28, $vr31 ++0x12 0x4c 0x9e 0x72 # CHECK: vftintrz.l.d $vr18, $vr0 ++0x2e 0x52 0x9e 0x72 # CHECK: vftintrne.w.s $vr14, $vr17 ++0x56 0x56 0x9e 0x72 # CHECK: vftintrne.l.d $vr22, $vr18 ++0x3a 0x5b 0x9e 0x72 # CHECK: vftint.wu.s $vr26, $vr25 ++0x69 0x5f 0x9e 0x72 # CHECK: vftint.lu.d $vr9, $vr27 ++0xda 0x72 0x9e 0x72 # CHECK: vftintrz.wu.s $vr26, $vr22 ++0x9d 0x76 0x9e 0x72 # CHECK: vftintrz.lu.d $vr29, $vr20 ++0x36 0x80 0x9e 0x72 # CHECK: vftintl.l.s $vr22, $vr1 ++0x0d 0x87 0x9e 0x72 # CHECK: vftinth.l.s $vr13, $vr24 ++0x68 0x8b 0x9e 0x72 # CHECK: vftintrml.l.s $vr8, $vr27 ++0x92 0x8f 0x9e 0x72 # CHECK: vftintrmh.l.s $vr18, $vr28 ++0x9b 0x93 0x9e 0x72 # CHECK: vftintrpl.l.s $vr27, $vr28 ++0xf4 0x94 0x9e 0x72 # CHECK: vftintrph.l.s $vr20, $vr7 ++0x46 0x98 0x9e 0x72 # CHECK: vftintrzl.l.s $vr6, $vr2 ++0xd5 0x9c 0x9e 0x72 # CHECK: vftintrzh.l.s $vr21, $vr6 ++0x79 0xa0 0x9e 0x72 # CHECK: vftintrnel.l.s $vr25, $vr3 ++0xa7 0xa4 0x9e 0x72 # CHECK: vftintrneh.l.s $vr7, $vr5 ++0x49 0xe0 0x9e 0x72 # CHECK: vexth.h.b $vr9, $vr2 ++0x64 0xe7 0x9e 0x72 # CHECK: vexth.w.h $vr4, $vr27 ++0x37 0xe8 0x9e 0x72 # CHECK: vexth.d.w $vr23, $vr1 ++0xcf 0xec 0x9e 0x72 # CHECK: vexth.q.d $vr15, $vr6 ++0x43 0xf0 0x9e 0x72 # CHECK: vexth.hu.bu $vr3, $vr2 ++0x5f 0xf7 0x9e 0x72 # CHECK: vexth.wu.hu $vr31, $vr26 ++0xea 0xfb 0x9e 0x72 # CHECK: vexth.du.wu $vr10, $vr31 ++0x1c 0xfd 0x9e 0x72 # CHECK: vexth.qu.du $vr28, $vr8 ++0x6f 0x00 0x9f 0x72 # CHECK: vreplgr2vr.b $vr15, $sp ++0xea 0x06 0x9f 0x72 # CHECK: vreplgr2vr.h $vr10, $r23 ++0x19 0x0a 0x9f 0x72 # CHECK: vreplgr2vr.w $vr25, $r16 ++0xfb 0x0c 0x9f 0x72 # CHECK: vreplgr2vr.d $vr27, $r7 ++0x18 0x3f 0xa0 0x72 # CHECK: vrotri.b $vr24, $vr24, 7 ++0xa1 0x40 0xa0 0x72 # CHECK: vrotri.h $vr1, $vr5, 0 ++0x0a 0xb1 0xa0 0x72 # CHECK: vrotri.w $vr10, $vr8, 12 ++0xbe 0xab 0xa1 0x72 # CHECK: vrotri.d $vr30, $vr29, 42 ++0x01 0x2e 0xa4 0x72 # CHECK: vsrlri.b $vr1, $vr16, 3 ++0xbc 0x46 0xa4 0x72 # CHECK: vsrlri.h $vr28, $vr21, 1 ++0x92 0xbc 0xa4 0x72 # CHECK: vsrlri.w $vr18, $vr4, 15 ++0x7e 0x4c 0xa5 0x72 # CHECK: vsrlri.d $vr30, $vr3, 19 ++0x0d 0x3c 0xa8 0x72 # CHECK: vsrari.b $vr13, $vr0, 7 ++0x31 0x59 0xa8 0x72 # CHECK: vsrari.h $vr17, $vr9, 6 ++0x83 0x9b 0xa8 0x72 # CHECK: vsrari.w $vr3, $vr28, 6 ++0x44 0x88 0xa9 0x72 # CHECK: vsrari.d $vr4, $vr2, 34 ++0x08 0x91 0xeb 0x72 # CHECK: vinsgr2vr.b $vr8, $r8, 4 ++0xed 0xc4 0xeb 0x72 # CHECK: vinsgr2vr.h $vr13, $r7, 1 ++0xc4 0xec 0xeb 0x72 # CHECK: vinsgr2vr.w $vr4, $r6, 3 ++0xf7 0xf3 0xeb 0x72 # CHECK: vinsgr2vr.d $vr23, $r31, 0 ++0x18 0xaa 0xef 0x72 # CHECK: vpickve2gr.b $r24, $vr16, 10 ++0x31 0xcf 0xef 0x72 # CHECK: vpickve2gr.h $r17, $vr25, 3 ++0x9e 0xeb 0xef 0x72 # CHECK: vpickve2gr.w $r30, $vr28, 2 ++0x39 0xf5 0xef 0x72 # CHECK: vpickve2gr.d $r25, $vr9, 1 ++0xdf 0x89 0xf3 0x72 # CHECK: vpickve2gr.bu $r31, $vr14, 2 ++0x2c 0xd8 0xf3 0x72 # CHECK: vpickve2gr.hu $r12, $vr1, 6 ++0x2a 0xe6 0xf3 0x72 # CHECK: vpickve2gr.wu $r10, $vr17, 1 ++0x1a 0xf5 0xf3 0x72 # CHECK: vpickve2gr.du $r26, $vr8, 1 ++0xc3 0xb0 0xf7 0x72 # CHECK: vreplvei.b $vr3, $vr6, 12 ++0xb6 0xdf 0xf7 0x72 # CHECK: vreplvei.h $vr22, $vr29, 7 ++0x51 0xe7 0xf7 0x72 # CHECK: vreplvei.w $vr17, $vr26, 1 ++0x20 0xf6 0xf7 0x72 # CHECK: vreplvei.d $vr0, $vr17, 1 ++0xd9 0x29 0x08 0x73 # CHECK: vsllwil.h.b $vr25, $vr14, 2 ++0xb8 0x44 0x08 0x73 # CHECK: vsllwil.w.h $vr24, $vr5, 1 ++0xd9 0xa1 0x08 0x73 # CHECK: vsllwil.d.w $vr25, $vr14, 8 ++0xc3 0x02 0x09 0x73 # CHECK: vextl.q.d $vr3, $vr22 ++0x2b 0x2f 0x0c 0x73 # CHECK: vsllwil.hu.bu $vr11, $vr25, 3 ++0x42 0x6b 0x0c 0x73 # CHECK: vsllwil.wu.hu $vr2, $vr26, 10 ++0x32 0xf1 0x0c 0x73 # CHECK: vsllwil.du.wu $vr18, $vr9, 28 ++0x2d 0x03 0x0d 0x73 # CHECK: vextl.qu.du $vr13, $vr25 ++0x1d 0x3b 0x10 0x73 # CHECK: vbitclri.b $vr29, $vr24, 6 ++0xfb 0x55 0x10 0x73 # CHECK: vbitclri.h $vr27, $vr15, 5 ++0x4b 0xa1 0x10 0x73 # CHECK: vbitclri.w $vr11, $vr10, 8 ++0xe4 0x3c 0x11 0x73 # CHECK: vbitclri.d $vr4, $vr7, 15 ++0x98 0x2e 0x14 0x73 # CHECK: vbitseti.b $vr24, $vr20, 3 ++0x06 0x61 0x14 0x73 # CHECK: vbitseti.h $vr6, $vr8, 8 ++0x35 0xe1 0x14 0x73 # CHECK: vbitseti.w $vr21, $vr9, 24 ++0x5c 0x7a 0x15 0x73 # CHECK: vbitseti.d $vr28, $vr18, 30 ++0xf3 0x23 0x18 0x73 # CHECK: vbitrevi.b $vr19, $vr31, 0 ++0x32 0x40 0x18 0x73 # CHECK: vbitrevi.h $vr18, $vr1, 0 ++0xd9 0xc8 0x18 0x73 # CHECK: vbitrevi.w $vr25, $vr6, 18 ++0x68 0x5b 0x19 0x73 # CHECK: vbitrevi.d $vr8, $vr27, 22 ++0x95 0x2b 0x24 0x73 # CHECK: vsat.b $vr21, $vr28, 2 ++0xa6 0x70 0x24 0x73 # CHECK: vsat.h $vr6, $vr5, 12 ++0xc3 0xc3 0x24 0x73 # CHECK: vsat.w $vr3, $vr30, 16 ++0xe0 0x63 0x25 0x73 # CHECK: vsat.d $vr0, $vr31, 24 ++0x94 0x2a 0x28 0x73 # CHECK: vsat.bu $vr20, $vr20, 2 ++0xc8 0x70 0x28 0x73 # CHECK: vsat.hu $vr8, $vr6, 12 ++0x92 0xea 0x28 0x73 # CHECK: vsat.wu $vr18, $vr20, 26 ++0xca 0x84 0x29 0x73 # CHECK: vsat.du $vr10, $vr6, 33 ++0x64 0x2e 0x2c 0x73 # CHECK: vslli.b $vr4, $vr19, 3 ++0xe3 0x7a 0x2c 0x73 # CHECK: vslli.h $vr3, $vr23, 14 ++0xb6 0x9a 0x2c 0x73 # CHECK: vslli.w $vr22, $vr21, 6 ++0xf7 0x91 0x2d 0x73 # CHECK: vslli.d $vr23, $vr15, 36 ++0x25 0x33 0x30 0x73 # CHECK: vsrli.b $vr5, $vr25, 4 ++0xc9 0x65 0x30 0x73 # CHECK: vsrli.h $vr9, $vr14, 9 ++0x07 0xb3 0x30 0x73 # CHECK: vsrli.w $vr7, $vr24, 12 ++0x4f 0xfe 0x31 0x73 # CHECK: vsrli.d $vr15, $vr18, 63 ++0x26 0x2c 0x34 0x73 # CHECK: vsrai.b $vr6, $vr1, 3 ++0xa7 0x4f 0x34 0x73 # CHECK: vsrai.h $vr7, $vr29, 3 ++0x7f 0xf7 0x34 0x73 # CHECK: vsrai.w $vr31, $vr27, 29 ++0xdc 0xe3 0x35 0x73 # CHECK: vsrai.d $vr28, $vr30, 56 ++0x42 0x4b 0x40 0x73 # CHECK: vsrlni.b.h $vr2, $vr26, 2 ++0xdf 0x8d 0x40 0x73 # CHECK: vsrlni.h.w $vr31, $vr14, 3 ++0x93 0x84 0x41 0x73 # CHECK: vsrlni.w.d $vr19, $vr4, 33 ++0x7f 0xfc 0x42 0x73 # CHECK: vsrlni.d.q $vr31, $vr3, 63 ++0x5a 0x42 0x44 0x73 # CHECK: vsrlrni.b.h $vr26, $vr18, 0 ++0xd2 0x96 0x44 0x73 # CHECK: vsrlrni.h.w $vr18, $vr22, 5 ++0x78 0x55 0x45 0x73 # CHECK: vsrlrni.w.d $vr24, $vr11, 21 ++0x66 0x95 0x46 0x73 # CHECK: vsrlrni.d.q $vr6, $vr11, 37 ++0xa3 0x56 0x48 0x73 # CHECK: vssrlni.b.h $vr3, $vr21, 5 ++0x26 0xc0 0x48 0x73 # CHECK: vssrlni.h.w $vr6, $vr1, 16 ++0xa4 0x6e 0x49 0x73 # CHECK: vssrlni.w.d $vr4, $vr21, 27 ++0x48 0x7a 0x4b 0x73 # CHECK: vssrlni.d.q $vr8, $vr18, 94 ++0x46 0x54 0x4c 0x73 # CHECK: vssrlni.bu.h $vr6, $vr2, 5 ++0xbd 0x8b 0x4c 0x73 # CHECK: vssrlni.hu.w $vr29, $vr29, 2 ++0x9c 0xbe 0x4d 0x73 # CHECK: vssrlni.wu.d $vr28, $vr20, 47 ++0x56 0x49 0x4f 0x73 # CHECK: vssrlni.du.q $vr22, $vr10, 82 ++0x31 0x6b 0x50 0x73 # CHECK: vssrlrni.b.h $vr17, $vr25, 10 ++0xb5 0x83 0x50 0x73 # CHECK: vssrlrni.h.w $vr21, $vr29, 0 ++0xe9 0xfd 0x51 0x73 # CHECK: vssrlrni.w.d $vr9, $vr15, 63 ++0x24 0xd4 0x53 0x73 # CHECK: vssrlrni.d.q $vr4, $vr1, 117 ++0xb9 0x4d 0x54 0x73 # CHECK: vssrlrni.bu.h $vr25, $vr13, 3 ++0x9e 0x9f 0x54 0x73 # CHECK: vssrlrni.hu.w $vr30, $vr28, 7 ++0x70 0x2f 0x55 0x73 # CHECK: vssrlrni.wu.d $vr16, $vr27, 11 ++0xb4 0xfd 0x56 0x73 # CHECK: vssrlrni.du.q $vr20, $vr13, 63 ++0x23 0x53 0x58 0x73 # CHECK: vsrani.b.h $vr3, $vr25, 4 ++0xac 0xc5 0x58 0x73 # CHECK: vsrani.h.w $vr12, $vr13, 17 ++0xc2 0x64 0x59 0x73 # CHECK: vsrani.w.d $vr2, $vr6, 25 ++0x0c 0xa5 0x5b 0x73 # CHECK: vsrani.d.q $vr12, $vr8, 105 ++0xbb 0x4a 0x5c 0x73 # CHECK: vsrarni.b.h $vr27, $vr21, 2 ++0x6d 0x80 0x5c 0x73 # CHECK: vsrarni.h.w $vr13, $vr3, 0 ++0xe9 0xab 0x5d 0x73 # CHECK: vsrarni.w.d $vr9, $vr31, 42 ++0xb9 0xec 0x5e 0x73 # CHECK: vsrarni.d.q $vr25, $vr5, 59 ++0xe8 0x70 0x60 0x73 # CHECK: vssrani.b.h $vr8, $vr7, 12 ++0x55 0xfa 0x60 0x73 # CHECK: vssrani.h.w $vr21, $vr18, 30 ++0xf7 0xcc 0x61 0x73 # CHECK: vssrani.w.d $vr23, $vr7, 51 ++0xcc 0x21 0x62 0x73 # CHECK: vssrani.d.q $vr12, $vr14, 8 ++0xb3 0x70 0x64 0x73 # CHECK: vssrani.bu.h $vr19, $vr5, 12 ++0x3b 0xbf 0x64 0x73 # CHECK: vssrani.hu.w $vr27, $vr25, 15 ++0x98 0xab 0x65 0x73 # CHECK: vssrani.wu.d $vr24, $vr28, 42 ++0xe4 0xfe 0x66 0x73 # CHECK: vssrani.du.q $vr4, $vr23, 63 ++0x1a 0x41 0x68 0x73 # CHECK: vssrarni.b.h $vr26, $vr8, 0 ++0x64 0xe4 0x68 0x73 # CHECK: vssrarni.h.w $vr4, $vr3, 25 ++0x20 0x4f 0x69 0x73 # CHECK: vssrarni.w.d $vr0, $vr25, 19 ++0x74 0xa9 0x6b 0x73 # CHECK: vssrarni.d.q $vr20, $vr11, 106 ++0x99 0x67 0x6c 0x73 # CHECK: vssrarni.bu.h $vr25, $vr28, 9 ++0xf4 0xb2 0x6c 0x73 # CHECK: vssrarni.hu.w $vr20, $vr23, 12 ++0xfc 0xea 0x6d 0x73 # CHECK: vssrarni.wu.d $vr28, $vr23, 58 ++0xc1 0x75 0x6f 0x73 # CHECK: vssrarni.du.q $vr1, $vr14, 93 ++0x6f 0x1f 0x80 0x73 # CHECK: vextrins.d $vr15, $vr27, 7 ++0x13 0x4c 0x86 0x73 # CHECK: vextrins.w $vr19, $vr0, 147 ++0x3d 0x15 0x89 0x73 # CHECK: vextrins.h $vr29, $vr9, 69 ++0xa0 0x5e 0x8c 0x73 # CHECK: vextrins.b $vr0, $vr21, 23 ++0x53 0xf1 0x92 0x73 # CHECK: vshuf4i.b $vr19, $vr10, 188 ++0x2f 0x2c 0x96 0x73 # CHECK: vshuf4i.h $vr15, $vr1, 139 ++0xa3 0x08 0x9a 0x73 # CHECK: vshuf4i.w $vr3, $vr5, 130 ++0xa8 0x0f 0x9e 0x73 # CHECK: vshuf4i.d $vr8, $vr29, 131 ++0x30 0xa3 0xc6 0x73 # CHECK: vbitseli.b $vr16, $vr25, 168 ++0xe4 0xe6 0xd1 0x73 # CHECK: vandi.b $vr4, $vr23, 121 ++0x47 0xf1 0xd6 0x73 # CHECK: vori.b $vr7, $vr10, 188 ++0x49 0x63 0xdb 0x73 # CHECK: vxori.b $vr9, $vr26, 216 ++0x84 0x6f 0xdf 0x73 # CHECK: vnori.b $vr4, $vr28, 219 ++0x56 0x2c 0xe2 0x73 # CHECK: vldi $vr22, -3742 ++0xae 0x43 0xe4 0x73 # CHECK: vpermi.w $vr14, $vr29, 16 ++0xeb 0x56 0x00 0x74 # CHECK: xvseq.b $xr11, $xr23, $xr21 ++0x46 0xed 0x00 0x74 # CHECK: xvseq.h $xr6, $xr10, $xr27 ++0x73 0x57 0x01 0x74 # CHECK: xvseq.w $xr19, $xr27, $xr21 ++0x92 0x88 0x01 0x74 # CHECK: xvseq.d $xr18, $xr4, $xr2 ++0x53 0x15 0x02 0x74 # CHECK: xvsle.b $xr19, $xr10, $xr5 ++0x2a 0xbb 0x02 0x74 # CHECK: xvsle.h $xr10, $xr25, $xr14 ++0xf1 0x4a 0x03 0x74 # CHECK: xvsle.w $xr17, $xr23, $xr18 ++0xef 0xa4 0x03 0x74 # CHECK: xvsle.d $xr15, $xr7, $xr9 ++0xc5 0x3d 0x04 0x74 # CHECK: xvsle.bu $xr5, $xr14, $xr15 ++0x29 0xe7 0x04 0x74 # CHECK: xvsle.hu $xr9, $xr25, $xr25 ++0xfc 0x43 0x05 0x74 # CHECK: xvsle.wu $xr28, $xr31, $xr16 ++0x11 0xe3 0x05 0x74 # CHECK: xvsle.du $xr17, $xr24, $xr24 ++0x92 0x67 0x06 0x74 # CHECK: xvslt.b $xr18, $xr28, $xr25 ++0xdd 0x88 0x06 0x74 # CHECK: xvslt.h $xr29, $xr6, $xr2 ++0x4e 0x15 0x07 0x74 # CHECK: xvslt.w $xr14, $xr10, $xr5 ++0xd3 0xbf 0x07 0x74 # CHECK: xvslt.d $xr19, $xr30, $xr15 ++0xce 0x6c 0x08 0x74 # CHECK: xvslt.bu $xr14, $xr6, $xr27 ++0x5b 0x97 0x08 0x74 # CHECK: xvslt.hu $xr27, $xr26, $xr5 ++0x26 0x29 0x09 0x74 # CHECK: xvslt.wu $xr6, $xr9, $xr10 ++0x8d 0xf1 0x09 0x74 # CHECK: xvslt.du $xr13, $xr12, $xr28 ++0xc0 0x0c 0x0a 0x74 # CHECK: xvadd.b $xr0, $xr6, $xr3 ++0x68 0xa9 0x0a 0x74 # CHECK: xvadd.h $xr8, $xr11, $xr10 ++0xc5 0x54 0x0b 0x74 # CHECK: xvadd.w $xr5, $xr6, $xr21 ++0xa4 0xaa 0x0b 0x74 # CHECK: xvadd.d $xr4, $xr21, $xr10 ++0x10 0x78 0x0c 0x74 # CHECK: xvsub.b $xr16, $xr0, $xr30 ++0x7c 0xc9 0x0c 0x74 # CHECK: xvsub.h $xr28, $xr11, $xr18 ++0x4d 0x34 0x0d 0x74 # CHECK: xvsub.w $xr13, $xr2, $xr13 ++0x20 0xd7 0x0d 0x74 # CHECK: xvsub.d $xr0, $xr25, $xr21 ++0xc8 0x2f 0x1e 0x74 # CHECK: xvaddwev.h.b $xr8, $xr30, $xr11 ++0xca 0x97 0x1e 0x74 # CHECK: xvaddwev.w.h $xr10, $xr30, $xr5 ++0x34 0x07 0x1f 0x74 # CHECK: xvaddwev.d.w $xr20, $xr25, $xr1 ++0x16 0xe3 0x1f 0x74 # CHECK: xvaddwev.q.d $xr22, $xr24, $xr24 ++0x21 0x07 0x20 0x74 # CHECK: xvsubwev.h.b $xr1, $xr25, $xr1 ++0xc4 0xaf 0x20 0x74 # CHECK: xvsubwev.w.h $xr4, $xr30, $xr11 ++0x46 0x48 0x21 0x74 # CHECK: xvsubwev.d.w $xr6, $xr2, $xr18 ++0x60 0xfd 0x21 0x74 # CHECK: xvsubwev.q.d $xr0, $xr11, $xr31 ++0x84 0x64 0x22 0x74 # CHECK: xvaddwod.h.b $xr4, $xr4, $xr25 ++0x2c 0xf7 0x22 0x74 # CHECK: xvaddwod.w.h $xr12, $xr25, $xr29 ++0xd0 0x4e 0x23 0x74 # CHECK: xvaddwod.d.w $xr16, $xr22, $xr19 ++0x37 0xbb 0x23 0x74 # CHECK: xvaddwod.q.d $xr23, $xr25, $xr14 ++0x01 0x22 0x24 0x74 # CHECK: xvsubwod.h.b $xr1, $xr16, $xr8 ++0x65 0xa1 0x24 0x74 # CHECK: xvsubwod.w.h $xr5, $xr11, $xr8 ++0xf4 0x00 0x25 0x74 # CHECK: xvsubwod.d.w $xr20, $xr7, $xr0 ++0xf1 0xd2 0x25 0x74 # CHECK: xvsubwod.q.d $xr17, $xr23, $xr20 ++0x4f 0x7d 0x2e 0x74 # CHECK: xvaddwev.h.bu $xr15, $xr10, $xr31 ++0x15 0xf3 0x2e 0x74 # CHECK: xvaddwev.w.hu $xr21, $xr24, $xr28 ++0xe9 0x3b 0x2f 0x74 # CHECK: xvaddwev.d.wu $xr9, $xr31, $xr14 ++0x39 0xa0 0x2f 0x74 # CHECK: xvaddwev.q.du $xr25, $xr1, $xr8 ++0xfe 0x37 0x30 0x74 # CHECK: xvsubwev.h.bu $xr30, $xr31, $xr13 ++0x81 0x87 0x30 0x74 # CHECK: xvsubwev.w.hu $xr1, $xr28, $xr1 ++0xfd 0x76 0x31 0x74 # CHECK: xvsubwev.d.wu $xr29, $xr23, $xr29 ++0x0d 0xee 0x31 0x74 # CHECK: xvsubwev.q.du $xr13, $xr16, $xr27 ++0xad 0x0b 0x32 0x74 # CHECK: xvaddwod.h.bu $xr13, $xr29, $xr2 ++0x4e 0xb5 0x32 0x74 # CHECK: xvaddwod.w.hu $xr14, $xr10, $xr13 ++0x5e 0x2b 0x33 0x74 # CHECK: xvaddwod.d.wu $xr30, $xr26, $xr10 ++0xa2 0x81 0x33 0x74 # CHECK: xvaddwod.q.du $xr2, $xr13, $xr0 ++0xc6 0x16 0x34 0x74 # CHECK: xvsubwod.h.bu $xr6, $xr22, $xr5 ++0xb3 0xa2 0x34 0x74 # CHECK: xvsubwod.w.hu $xr19, $xr21, $xr8 ++0x70 0x79 0x35 0x74 # CHECK: xvsubwod.d.wu $xr16, $xr11, $xr30 ++0x41 0xa7 0x35 0x74 # CHECK: xvsubwod.q.du $xr1, $xr26, $xr9 ++0xa5 0x09 0x3e 0x74 # CHECK: xvaddwev.h.bu.b $xr5, $xr13, $xr2 ++0xb1 0xd2 0x3e 0x74 # CHECK: xvaddwev.w.hu.h $xr17, $xr21, $xr20 ++0x6b 0x4f 0x3f 0x74 # CHECK: xvaddwev.d.wu.w $xr11, $xr27, $xr19 ++0xb4 0xf6 0x3f 0x74 # CHECK: xvaddwev.q.du.d $xr20, $xr21, $xr29 ++0xc1 0x38 0x40 0x74 # CHECK: xvaddwod.h.bu.b $xr1, $xr6, $xr14 ++0xa7 0xaf 0x40 0x74 # CHECK: xvaddwod.w.hu.h $xr7, $xr29, $xr11 ++0x50 0x39 0x41 0x74 # CHECK: xvaddwod.d.wu.w $xr16, $xr10, $xr14 ++0x6a 0xdd 0x41 0x74 # CHECK: xvaddwod.q.du.d $xr10, $xr11, $xr23 ++0x58 0x71 0x46 0x74 # CHECK: xvsadd.b $xr24, $xr10, $xr28 ++0x53 0xc6 0x46 0x74 # CHECK: xvsadd.h $xr19, $xr18, $xr17 ++0xc2 0x30 0x47 0x74 # CHECK: xvsadd.w $xr2, $xr6, $xr12 ++0x4f 0xf6 0x47 0x74 # CHECK: xvsadd.d $xr15, $xr18, $xr29 ++0xaf 0x43 0x48 0x74 # CHECK: xvssub.b $xr15, $xr29, $xr16 ++0x7c 0xa4 0x48 0x74 # CHECK: xvssub.h $xr28, $xr3, $xr9 ++0x88 0x3e 0x49 0x74 # CHECK: xvssub.w $xr8, $xr20, $xr15 ++0x17 0xcd 0x49 0x74 # CHECK: xvssub.d $xr23, $xr8, $xr19 ++0x8c 0x40 0x4a 0x74 # CHECK: xvsadd.bu $xr12, $xr4, $xr16 ++0x49 0xd3 0x4a 0x74 # CHECK: xvsadd.hu $xr9, $xr26, $xr20 ++0xfe 0x71 0x4b 0x74 # CHECK: xvsadd.wu $xr30, $xr15, $xr28 ++0xaf 0xf1 0x4b 0x74 # CHECK: xvsadd.du $xr15, $xr13, $xr28 ++0x6a 0x3c 0x4c 0x74 # CHECK: xvssub.bu $xr10, $xr3, $xr15 ++0x80 0x89 0x4c 0x74 # CHECK: xvssub.hu $xr0, $xr12, $xr2 ++0x5e 0x5d 0x4d 0x74 # CHECK: xvssub.wu $xr30, $xr10, $xr23 ++0xc9 0xbb 0x4d 0x74 # CHECK: xvssub.du $xr9, $xr30, $xr14 ++0xb9 0x48 0x54 0x74 # CHECK: xvhaddw.h.b $xr25, $xr5, $xr18 ++0x87 0xce 0x54 0x74 # CHECK: xvhaddw.w.h $xr7, $xr20, $xr19 ++0xb7 0x10 0x55 0x74 # CHECK: xvhaddw.d.w $xr23, $xr5, $xr4 ++0xf1 0xe4 0x55 0x74 # CHECK: xvhaddw.q.d $xr17, $xr7, $xr25 ++0x5d 0x4e 0x56 0x74 # CHECK: xvhsubw.h.b $xr29, $xr18, $xr19 ++0x9e 0x8f 0x56 0x74 # CHECK: xvhsubw.w.h $xr30, $xr28, $xr3 ++0x25 0x35 0x57 0x74 # CHECK: xvhsubw.d.w $xr5, $xr9, $xr13 ++0x94 0xf5 0x57 0x74 # CHECK: xvhsubw.q.d $xr20, $xr12, $xr29 ++0x4b 0x1d 0x58 0x74 # CHECK: xvhaddw.hu.bu $xr11, $xr10, $xr7 ++0xb0 0xd6 0x58 0x74 # CHECK: xvhaddw.wu.hu $xr16, $xr21, $xr21 ++0xf1 0x23 0x59 0x74 # CHECK: xvhaddw.du.wu $xr17, $xr31, $xr8 ++0x82 0xac 0x59 0x74 # CHECK: xvhaddw.qu.du $xr2, $xr4, $xr11 ++0xd5 0x21 0x5a 0x74 # CHECK: xvhsubw.hu.bu $xr21, $xr14, $xr8 ++0x19 0xec 0x5a 0x74 # CHECK: xvhsubw.wu.hu $xr25, $xr0, $xr27 ++0x04 0x7a 0x5b 0x74 # CHECK: xvhsubw.du.wu $xr4, $xr16, $xr30 ++0x2b 0x99 0x5b 0x74 # CHECK: xvhsubw.qu.du $xr11, $xr9, $xr6 ++0xae 0x6a 0x5c 0x74 # CHECK: xvadda.b $xr14, $xr21, $xr26 ++0xd5 0xd7 0x5c 0x74 # CHECK: xvadda.h $xr21, $xr30, $xr21 ++0x7f 0x4e 0x5d 0x74 # CHECK: xvadda.w $xr31, $xr19, $xr19 ++0x89 0xfc 0x5d 0x74 # CHECK: xvadda.d $xr9, $xr4, $xr31 ++0x74 0x36 0x60 0x74 # CHECK: xvabsd.b $xr20, $xr19, $xr13 ++0xf4 0xa8 0x60 0x74 # CHECK: xvabsd.h $xr20, $xr7, $xr10 ++0xf7 0x03 0x61 0x74 # CHECK: xvabsd.w $xr23, $xr31, $xr0 ++0x27 0xba 0x61 0x74 # CHECK: xvabsd.d $xr7, $xr17, $xr14 ++0xec 0x1a 0x62 0x74 # CHECK: xvabsd.bu $xr12, $xr23, $xr6 ++0xd0 0xcf 0x62 0x74 # CHECK: xvabsd.hu $xr16, $xr30, $xr19 ++0xb3 0x68 0x63 0x74 # CHECK: xvabsd.wu $xr19, $xr5, $xr26 ++0x80 0x9d 0x63 0x74 # CHECK: xvabsd.du $xr0, $xr12, $xr7 ++0xf7 0x67 0x64 0x74 # CHECK: xvavg.b $xr23, $xr31, $xr25 ++0x5b 0xec 0x64 0x74 # CHECK: xvavg.h $xr27, $xr2, $xr27 ++0x14 0x40 0x65 0x74 # CHECK: xvavg.w $xr20, $xr0, $xr16 ++0x2d 0xa9 0x65 0x74 # CHECK: xvavg.d $xr13, $xr9, $xr10 ++0xdf 0x13 0x66 0x74 # CHECK: xvavg.bu $xr31, $xr30, $xr4 ++0x36 0x96 0x66 0x74 # CHECK: xvavg.hu $xr22, $xr17, $xr5 ++0xb5 0x47 0x67 0x74 # CHECK: xvavg.wu $xr21, $xr29, $xr17 ++0xab 0xf4 0x67 0x74 # CHECK: xvavg.du $xr11, $xr5, $xr29 ++0xb7 0x35 0x68 0x74 # CHECK: xvavgr.b $xr23, $xr13, $xr13 ++0x9e 0xfe 0x68 0x74 # CHECK: xvavgr.h $xr30, $xr20, $xr31 ++0x9d 0x27 0x69 0x74 # CHECK: xvavgr.w $xr29, $xr28, $xr9 ++0x95 0xa2 0x69 0x74 # CHECK: xvavgr.d $xr21, $xr20, $xr8 ++0x20 0x11 0x6a 0x74 # CHECK: xvavgr.bu $xr0, $xr9, $xr4 ++0x03 0xec 0x6a 0x74 # CHECK: xvavgr.hu $xr3, $xr0, $xr27 ++0xc2 0x57 0x6b 0x74 # CHECK: xvavgr.wu $xr2, $xr30, $xr21 ++0xb6 0xc6 0x6b 0x74 # CHECK: xvavgr.du $xr22, $xr21, $xr17 ++0x81 0x4e 0x70 0x74 # CHECK: xvmax.b $xr1, $xr20, $xr19 ++0x20 0xba 0x70 0x74 # CHECK: xvmax.h $xr0, $xr17, $xr14 ++0x00 0x41 0x71 0x74 # CHECK: xvmax.w $xr0, $xr8, $xr16 ++0xf0 0xc2 0x71 0x74 # CHECK: xvmax.d $xr16, $xr23, $xr16 ++0xd4 0x38 0x72 0x74 # CHECK: xvmin.b $xr20, $xr6, $xr14 ++0x64 0xe0 0x72 0x74 # CHECK: xvmin.h $xr4, $xr3, $xr24 ++0x45 0x5c 0x73 0x74 # CHECK: xvmin.w $xr5, $xr2, $xr23 ++0xff 0xea 0x73 0x74 # CHECK: xvmin.d $xr31, $xr23, $xr26 ++0xae 0x0d 0x74 0x74 # CHECK: xvmax.bu $xr14, $xr13, $xr3 ++0x36 0x92 0x74 0x74 # CHECK: xvmax.hu $xr22, $xr17, $xr4 ++0xb1 0x75 0x75 0x74 # CHECK: xvmax.wu $xr17, $xr13, $xr29 ++0x4d 0x80 0x75 0x74 # CHECK: xvmax.du $xr13, $xr2, $xr0 ++0xf2 0x6f 0x76 0x74 # CHECK: xvmin.bu $xr18, $xr31, $xr27 ++0x42 0xb9 0x76 0x74 # CHECK: xvmin.hu $xr2, $xr10, $xr14 ++0x1f 0x69 0x77 0x74 # CHECK: xvmin.wu $xr31, $xr8, $xr26 ++0x4c 0xa7 0x77 0x74 # CHECK: xvmin.du $xr12, $xr26, $xr9 ++0x5a 0x0c 0x84 0x74 # CHECK: xvmul.b $xr26, $xr2, $xr3 ++0xb0 0x97 0x84 0x74 # CHECK: xvmul.h $xr16, $xr29, $xr5 ++0x33 0x0c 0x85 0x74 # CHECK: xvmul.w $xr19, $xr1, $xr3 ++0xef 0x81 0x85 0x74 # CHECK: xvmul.d $xr15, $xr15, $xr0 ++0x89 0x25 0x86 0x74 # CHECK: xvmuh.b $xr9, $xr12, $xr9 ++0xe8 0xc2 0x86 0x74 # CHECK: xvmuh.h $xr8, $xr23, $xr16 ++0xdd 0x2c 0x87 0x74 # CHECK: xvmuh.w $xr29, $xr6, $xr11 ++0x43 0x9e 0x87 0x74 # CHECK: xvmuh.d $xr3, $xr18, $xr7 ++0xe3 0x4c 0x88 0x74 # CHECK: xvmuh.bu $xr3, $xr7, $xr19 ++0x2d 0xc8 0x88 0x74 # CHECK: xvmuh.hu $xr13, $xr1, $xr18 ++0xaf 0x42 0x89 0x74 # CHECK: xvmuh.wu $xr15, $xr21, $xr16 ++0x4b 0xcd 0x89 0x74 # CHECK: xvmuh.du $xr11, $xr10, $xr19 ++0x84 0x25 0x90 0x74 # CHECK: xvmulwev.h.b $xr4, $xr12, $xr9 ++0x6a 0xd0 0x90 0x74 # CHECK: xvmulwev.w.h $xr10, $xr3, $xr20 ++0xc4 0x4a 0x91 0x74 # CHECK: xvmulwev.d.w $xr4, $xr22, $xr18 ++0xb4 0xee 0x91 0x74 # CHECK: xvmulwev.q.d $xr20, $xr21, $xr27 ++0xe5 0x00 0x92 0x74 # CHECK: xvmulwod.h.b $xr5, $xr7, $xr0 ++0x93 0xaf 0x92 0x74 # CHECK: xvmulwod.w.h $xr19, $xr28, $xr11 ++0xf3 0x40 0x93 0x74 # CHECK: xvmulwod.d.w $xr19, $xr7, $xr16 ++0x8b 0xb5 0x93 0x74 # CHECK: xvmulwod.q.d $xr11, $xr12, $xr13 ++0x56 0x04 0x98 0x74 # CHECK: xvmulwev.h.bu $xr22, $xr2, $xr1 ++0x62 0x90 0x98 0x74 # CHECK: xvmulwev.w.hu $xr2, $xr3, $xr4 ++0x82 0x65 0x99 0x74 # CHECK: xvmulwev.d.wu $xr2, $xr12, $xr25 ++0xb6 0xc7 0x99 0x74 # CHECK: xvmulwev.q.du $xr22, $xr29, $xr17 ++0x29 0x01 0x9a 0x74 # CHECK: xvmulwod.h.bu $xr9, $xr9, $xr0 ++0x54 0xc0 0x9a 0x74 # CHECK: xvmulwod.w.hu $xr20, $xr2, $xr16 ++0x61 0x61 0x9b 0x74 # CHECK: xvmulwod.d.wu $xr1, $xr11, $xr24 ++0x53 0xd8 0x9b 0x74 # CHECK: xvmulwod.q.du $xr19, $xr2, $xr22 ++0xb6 0x63 0xa0 0x74 # CHECK: xvmulwev.h.bu.b $xr22, $xr29, $xr24 ++0xc1 0xae 0xa0 0x74 # CHECK: xvmulwev.w.hu.h $xr1, $xr22, $xr11 ++0x8c 0x31 0xa1 0x74 # CHECK: xvmulwev.d.wu.w $xr12, $xr12, $xr12 ++0x20 0xde 0xa1 0x74 # CHECK: xvmulwev.q.du.d $xr0, $xr17, $xr23 ++0x1a 0x5e 0xa2 0x74 # CHECK: xvmulwod.h.bu.b $xr26, $xr16, $xr23 ++0x9f 0xa5 0xa2 0x74 # CHECK: xvmulwod.w.hu.h $xr31, $xr12, $xr9 ++0x75 0x4f 0xa3 0x74 # CHECK: xvmulwod.d.wu.w $xr21, $xr27, $xr19 ++0xa7 0xac 0xa3 0x74 # CHECK: xvmulwod.q.du.d $xr7, $xr5, $xr11 ++0x76 0x3d 0xa8 0x74 # CHECK: xvmadd.b $xr22, $xr11, $xr15 ++0xc3 0xe7 0xa8 0x74 # CHECK: xvmadd.h $xr3, $xr30, $xr25 ++0x41 0x16 0xa9 0x74 # CHECK: xvmadd.w $xr1, $xr18, $xr5 ++0xb0 0xae 0xa9 0x74 # CHECK: xvmadd.d $xr16, $xr21, $xr11 ++0x8b 0x29 0xaa 0x74 # CHECK: xvmsub.b $xr11, $xr12, $xr10 ++0x70 0x85 0xaa 0x74 # CHECK: xvmsub.h $xr16, $xr11, $xr1 ++0xaf 0x56 0xab 0x74 # CHECK: xvmsub.w $xr15, $xr21, $xr21 ++0x6c 0x91 0xab 0x74 # CHECK: xvmsub.d $xr12, $xr11, $xr4 ++0xf5 0x18 0xac 0x74 # CHECK: xvmaddwev.h.b $xr21, $xr7, $xr6 ++0xb0 0xb7 0xac 0x74 # CHECK: xvmaddwev.w.h $xr16, $xr29, $xr13 ++0x27 0x7b 0xad 0x74 # CHECK: xvmaddwev.d.w $xr7, $xr25, $xr30 ++0x73 0xa0 0xad 0x74 # CHECK: xvmaddwev.q.d $xr19, $xr3, $xr8 ++0x74 0x33 0xae 0x74 # CHECK: xvmaddwod.h.b $xr20, $xr27, $xr12 ++0xa0 0xb6 0xae 0x74 # CHECK: xvmaddwod.w.h $xr0, $xr21, $xr13 ++0xb9 0x7d 0xaf 0x74 # CHECK: xvmaddwod.d.w $xr25, $xr13, $xr31 ++0x5a 0xc3 0xaf 0x74 # CHECK: xvmaddwod.q.d $xr26, $xr26, $xr16 ++0x52 0x57 0xb4 0x74 # CHECK: xvmaddwev.h.bu $xr18, $xr26, $xr21 ++0x0e 0x96 0xb4 0x74 # CHECK: xvmaddwev.w.hu $xr14, $xr16, $xr5 ++0xb3 0x53 0xb5 0x74 # CHECK: xvmaddwev.d.wu $xr19, $xr29, $xr20 ++0xaf 0xc7 0xb5 0x74 # CHECK: xvmaddwev.q.du $xr15, $xr29, $xr17 ++0x4d 0x07 0xb6 0x74 # CHECK: xvmaddwod.h.bu $xr13, $xr26, $xr1 ++0x2f 0xc3 0xb6 0x74 # CHECK: xvmaddwod.w.hu $xr15, $xr25, $xr16 ++0x97 0x24 0xb7 0x74 # CHECK: xvmaddwod.d.wu $xr23, $xr4, $xr9 ++0xdd 0xc6 0xb7 0x74 # CHECK: xvmaddwod.q.du $xr29, $xr22, $xr17 ++0x37 0x18 0xbc 0x74 # CHECK: xvmaddwev.h.bu.b $xr23, $xr1, $xr6 ++0x64 0xb3 0xbc 0x74 # CHECK: xvmaddwev.w.hu.h $xr4, $xr27, $xr12 ++0x40 0x14 0xbd 0x74 # CHECK: xvmaddwev.d.wu.w $xr0, $xr2, $xr5 ++0xe9 0x87 0xbd 0x74 # CHECK: xvmaddwev.q.du.d $xr9, $xr31, $xr1 ++0x69 0x52 0xbe 0x74 # CHECK: xvmaddwod.h.bu.b $xr9, $xr19, $xr20 ++0xa7 0xb4 0xbe 0x74 # CHECK: xvmaddwod.w.hu.h $xr7, $xr5, $xr13 ++0x6a 0x07 0xbf 0x74 # CHECK: xvmaddwod.d.wu.w $xr10, $xr27, $xr1 ++0x79 0x82 0xbf 0x74 # CHECK: xvmaddwod.q.du.d $xr25, $xr19, $xr0 ++0xe3 0x0b 0xe0 0x74 # CHECK: xvdiv.b $xr3, $xr31, $xr2 ++0x81 0xc5 0xe0 0x74 # CHECK: xvdiv.h $xr1, $xr12, $xr17 ++0x0d 0x30 0xe1 0x74 # CHECK: xvdiv.w $xr13, $xr0, $xr12 ++0xb1 0xac 0xe1 0x74 # CHECK: xvdiv.d $xr17, $xr5, $xr11 ++0x36 0x06 0xe2 0x74 # CHECK: xvmod.b $xr22, $xr17, $xr1 ++0xbc 0xb0 0xe2 0x74 # CHECK: xvmod.h $xr28, $xr5, $xr12 ++0x7d 0x3a 0xe3 0x74 # CHECK: xvmod.w $xr29, $xr19, $xr14 ++0x11 0x99 0xe3 0x74 # CHECK: xvmod.d $xr17, $xr8, $xr6 ++0xd7 0x08 0xe4 0x74 # CHECK: xvdiv.bu $xr23, $xr6, $xr2 ++0xe9 0x83 0xe4 0x74 # CHECK: xvdiv.hu $xr9, $xr31, $xr0 ++0x2f 0x10 0xe5 0x74 # CHECK: xvdiv.wu $xr15, $xr1, $xr4 ++0xae 0xaf 0xe5 0x74 # CHECK: xvdiv.du $xr14, $xr29, $xr11 ++0x84 0x7d 0xe6 0x74 # CHECK: xvmod.bu $xr4, $xr12, $xr31 ++0x96 0xad 0xe6 0x74 # CHECK: xvmod.hu $xr22, $xr12, $xr11 ++0xf5 0x2a 0xe7 0x74 # CHECK: xvmod.wu $xr21, $xr23, $xr10 ++0xb5 0xfe 0xe7 0x74 # CHECK: xvmod.du $xr21, $xr21, $xr31 ++0x50 0x2d 0xe8 0x74 # CHECK: xvsll.b $xr16, $xr10, $xr11 ++0x4c 0xed 0xe8 0x74 # CHECK: xvsll.h $xr12, $xr10, $xr27 ++0x5e 0x68 0xe9 0x74 # CHECK: xvsll.w $xr30, $xr2, $xr26 ++0xa8 0xc6 0xe9 0x74 # CHECK: xvsll.d $xr8, $xr21, $xr17 ++0x1b 0x4b 0xea 0x74 # CHECK: xvsrl.b $xr27, $xr24, $xr18 ++0xf1 0xe3 0xea 0x74 # CHECK: xvsrl.h $xr17, $xr31, $xr24 ++0x65 0x10 0xeb 0x74 # CHECK: xvsrl.w $xr5, $xr3, $xr4 ++0xd5 0xa0 0xeb 0x74 # CHECK: xvsrl.d $xr21, $xr6, $xr8 ++0x9c 0x57 0xec 0x74 # CHECK: xvsra.b $xr28, $xr28, $xr21 ++0x93 0xe8 0xec 0x74 # CHECK: xvsra.h $xr19, $xr4, $xr26 ++0x8d 0x06 0xed 0x74 # CHECK: xvsra.w $xr13, $xr20, $xr1 ++0x00 0xc9 0xed 0x74 # CHECK: xvsra.d $xr0, $xr8, $xr18 ++0xc8 0x73 0xee 0x74 # CHECK: xvrotr.b $xr8, $xr30, $xr28 ++0x71 0x82 0xee 0x74 # CHECK: xvrotr.h $xr17, $xr19, $xr0 ++0x8f 0x5f 0xef 0x74 # CHECK: xvrotr.w $xr15, $xr28, $xr23 ++0x5f 0xd4 0xef 0x74 # CHECK: xvrotr.d $xr31, $xr2, $xr21 ++0x54 0x2f 0xf0 0x74 # CHECK: xvsrlr.b $xr20, $xr26, $xr11 ++0x4d 0x9e 0xf0 0x74 # CHECK: xvsrlr.h $xr13, $xr18, $xr7 ++0x3c 0x0c 0xf1 0x74 # CHECK: xvsrlr.w $xr28, $xr1, $xr3 ++0x66 0xb8 0xf1 0x74 # CHECK: xvsrlr.d $xr6, $xr3, $xr14 ++0x0a 0x45 0xf2 0x74 # CHECK: xvsrar.b $xr10, $xr8, $xr17 ++0x5f 0xac 0xf2 0x74 # CHECK: xvsrar.h $xr31, $xr2, $xr11 ++0x0d 0x15 0xf3 0x74 # CHECK: xvsrar.w $xr13, $xr8, $xr5 ++0x4c 0x82 0xf3 0x74 # CHECK: xvsrar.d $xr12, $xr18, $xr0 ++0xcf 0xbc 0xf4 0x74 # CHECK: xvsrln.b.h $xr15, $xr6, $xr15 ++0x76 0x46 0xf5 0x74 # CHECK: xvsrln.h.w $xr22, $xr19, $xr17 ++0xe4 0x94 0xf5 0x74 # CHECK: xvsrln.w.d $xr4, $xr7, $xr5 ++0x63 0xde 0xf6 0x74 # CHECK: xvsran.b.h $xr3, $xr19, $xr23 ++0xd0 0x04 0xf7 0x74 # CHECK: xvsran.h.w $xr16, $xr6, $xr1 ++0x1b 0x82 0xf7 0x74 # CHECK: xvsran.w.d $xr27, $xr16, $xr0 ++0x22 0xa5 0xf8 0x74 # CHECK: xvsrlrn.b.h $xr2, $xr9, $xr9 ++0x70 0x4d 0xf9 0x74 # CHECK: xvsrlrn.h.w $xr16, $xr11, $xr19 ++0x3d 0xbf 0xf9 0x74 # CHECK: xvsrlrn.w.d $xr29, $xr25, $xr15 ++0x8d 0xb6 0xfa 0x74 # CHECK: xvsrarn.b.h $xr13, $xr20, $xr13 ++0xcd 0x06 0xfb 0x74 # CHECK: xvsrarn.h.w $xr13, $xr22, $xr1 ++0x8d 0x89 0xfb 0x74 # CHECK: xvsrarn.w.d $xr13, $xr12, $xr2 ++0x73 0xaa 0xfc 0x74 # CHECK: xvssrln.b.h $xr19, $xr19, $xr10 ++0x0c 0x47 0xfd 0x74 # CHECK: xvssrln.h.w $xr12, $xr24, $xr17 ++0xc7 0xbb 0xfd 0x74 # CHECK: xvssrln.w.d $xr7, $xr30, $xr14 ++0x26 0xdd 0xfe 0x74 # CHECK: xvssran.b.h $xr6, $xr9, $xr23 ++0x2d 0x09 0xff 0x74 # CHECK: xvssran.h.w $xr13, $xr9, $xr2 ++0x52 0x87 0xff 0x74 # CHECK: xvssran.w.d $xr18, $xr26, $xr1 ++0x38 0xde 0x00 0x75 # CHECK: xvssrlrn.b.h $xr24, $xr17, $xr23 ++0x8a 0x21 0x01 0x75 # CHECK: xvssrlrn.h.w $xr10, $xr12, $xr8 ++0x7e 0x9b 0x01 0x75 # CHECK: xvssrlrn.w.d $xr30, $xr27, $xr6 ++0x74 0xff 0x02 0x75 # CHECK: xvssrarn.b.h $xr20, $xr27, $xr31 ++0xf8 0x5e 0x03 0x75 # CHECK: xvssrarn.h.w $xr24, $xr23, $xr23 ++0xa8 0xe7 0x03 0x75 # CHECK: xvssrarn.w.d $xr8, $xr29, $xr25 ++0x8e 0xc4 0x04 0x75 # CHECK: xvssrln.bu.h $xr14, $xr4, $xr17 ++0x9c 0x2a 0x05 0x75 # CHECK: xvssrln.hu.w $xr28, $xr20, $xr10 ++0x0a 0xd1 0x05 0x75 # CHECK: xvssrln.wu.d $xr10, $xr8, $xr20 ++0x92 0xdf 0x06 0x75 # CHECK: xvssran.bu.h $xr18, $xr28, $xr23 ++0x79 0x62 0x07 0x75 # CHECK: xvssran.hu.w $xr25, $xr19, $xr24 ++0xb0 0xcb 0x07 0x75 # CHECK: xvssran.wu.d $xr16, $xr29, $xr18 ++0x62 0xba 0x08 0x75 # CHECK: xvssrlrn.bu.h $xr2, $xr19, $xr14 ++0x06 0x48 0x09 0x75 # CHECK: xvssrlrn.hu.w $xr6, $xr0, $xr18 ++0x9e 0xfc 0x09 0x75 # CHECK: xvssrlrn.wu.d $xr30, $xr4, $xr31 ++0x90 0xa3 0x0a 0x75 # CHECK: xvssrarn.bu.h $xr16, $xr28, $xr8 ++0x4b 0x18 0x0b 0x75 # CHECK: xvssrarn.hu.w $xr11, $xr2, $xr6 ++0xd6 0xb0 0x0b 0x75 # CHECK: xvssrarn.wu.d $xr22, $xr6, $xr12 ++0x04 0x42 0x0c 0x75 # CHECK: xvbitclr.b $xr4, $xr16, $xr16 ++0xf0 0xeb 0x0c 0x75 # CHECK: xvbitclr.h $xr16, $xr31, $xr26 ++0x58 0x50 0x0d 0x75 # CHECK: xvbitclr.w $xr24, $xr2, $xr20 ++0x92 0xf9 0x0d 0x75 # CHECK: xvbitclr.d $xr18, $xr12, $xr30 ++0x7a 0x5f 0x0e 0x75 # CHECK: xvbitset.b $xr26, $xr27, $xr23 ++0x73 0xae 0x0e 0x75 # CHECK: xvbitset.h $xr19, $xr19, $xr11 ++0x27 0x49 0x0f 0x75 # CHECK: xvbitset.w $xr7, $xr9, $xr18 ++0xc6 0x8f 0x0f 0x75 # CHECK: xvbitset.d $xr6, $xr30, $xr3 ++0xbe 0x1d 0x10 0x75 # CHECK: xvbitrev.b $xr30, $xr13, $xr7 ++0x6c 0xa0 0x10 0x75 # CHECK: xvbitrev.h $xr12, $xr3, $xr8 ++0x88 0x52 0x11 0x75 # CHECK: xvbitrev.w $xr8, $xr20, $xr20 ++0xfc 0xc4 0x11 0x75 # CHECK: xvbitrev.d $xr28, $xr7, $xr17 ++0x5d 0x32 0x16 0x75 # CHECK: xvpackev.b $xr29, $xr18, $xr12 ++0x66 0xc5 0x16 0x75 # CHECK: xvpackev.h $xr6, $xr11, $xr17 ++0x42 0x78 0x17 0x75 # CHECK: xvpackev.w $xr2, $xr2, $xr30 ++0xfa 0xd5 0x17 0x75 # CHECK: xvpackev.d $xr26, $xr15, $xr21 ++0x33 0x46 0x18 0x75 # CHECK: xvpackod.b $xr19, $xr17, $xr17 ++0x0f 0x8d 0x18 0x75 # CHECK: xvpackod.h $xr15, $xr8, $xr3 ++0xed 0x31 0x19 0x75 # CHECK: xvpackod.w $xr13, $xr15, $xr12 ++0x65 0xe8 0x19 0x75 # CHECK: xvpackod.d $xr5, $xr3, $xr26 ++0x3b 0x05 0x1a 0x75 # CHECK: xvilvl.b $xr27, $xr9, $xr1 ++0x1d 0x85 0x1a 0x75 # CHECK: xvilvl.h $xr29, $xr8, $xr1 ++0x09 0x1d 0x1b 0x75 # CHECK: xvilvl.w $xr9, $xr8, $xr7 ++0xf9 0xc8 0x1b 0x75 # CHECK: xvilvl.d $xr25, $xr7, $xr18 ++0x07 0x6b 0x1c 0x75 # CHECK: xvilvh.b $xr7, $xr24, $xr26 ++0x86 0xf2 0x1c 0x75 # CHECK: xvilvh.h $xr6, $xr20, $xr28 ++0xad 0x30 0x1d 0x75 # CHECK: xvilvh.w $xr13, $xr5, $xr12 ++0xa1 0xfe 0x1d 0x75 # CHECK: xvilvh.d $xr1, $xr21, $xr31 ++0xb1 0x7d 0x1e 0x75 # CHECK: xvpickev.b $xr17, $xr13, $xr31 ++0x04 0xb9 0x1e 0x75 # CHECK: xvpickev.h $xr4, $xr8, $xr14 ++0x0a 0x2d 0x1f 0x75 # CHECK: xvpickev.w $xr10, $xr8, $xr11 ++0x9a 0xa2 0x1f 0x75 # CHECK: xvpickev.d $xr26, $xr20, $xr8 ++0xb3 0x6e 0x20 0x75 # CHECK: xvpickod.b $xr19, $xr21, $xr27 ++0xbc 0xcc 0x20 0x75 # CHECK: xvpickod.h $xr28, $xr5, $xr19 ++0x55 0x5a 0x21 0x75 # CHECK: xvpickod.w $xr21, $xr18, $xr22 ++0xfc 0xc8 0x21 0x75 # CHECK: xvpickod.d $xr28, $xr7, $xr18 ++0x86 0x66 0x22 0x75 # CHECK: xvreplve.b $xr6, $xr20, $r25 ++0xfb 0xb8 0x22 0x75 # CHECK: xvreplve.h $xr27, $xr7, $r14 ++0x81 0x3c 0x23 0x75 # CHECK: xvreplve.w $xr1, $xr4, $r15 ++0x8c 0xc1 0x23 0x75 # CHECK: xvreplve.d $xr12, $xr12, $r16 ++0x61 0x74 0x26 0x75 # CHECK: xvand.v $xr1, $xr3, $xr29 ++0x77 0xd1 0x26 0x75 # CHECK: xvor.v $xr23, $xr11, $xr20 ++0x3f 0x78 0x27 0x75 # CHECK: xvxor.v $xr31, $xr1, $xr30 ++0x5d 0xb7 0x27 0x75 # CHECK: xvnor.v $xr29, $xr26, $xr13 ++0xc9 0x01 0x28 0x75 # CHECK: xvandn.v $xr9, $xr14, $xr0 ++0x19 0xb1 0x28 0x75 # CHECK: xvorn.v $xr25, $xr8, $xr12 ++0x55 0x6b 0x2b 0x75 # CHECK: xvfrstp.b $xr21, $xr26, $xr26 ++0x24 0x8a 0x2b 0x75 # CHECK: xvfrstp.h $xr4, $xr17, $xr2 ++0x9d 0x47 0x2d 0x75 # CHECK: xvadd.q $xr29, $xr28, $xr17 ++0x5d 0xec 0x2d 0x75 # CHECK: xvsub.q $xr29, $xr2, $xr27 ++0x92 0x1f 0x2e 0x75 # CHECK: xvsigncov.b $xr18, $xr28, $xr7 ++0x92 0xc5 0x2e 0x75 # CHECK: xvsigncov.h $xr18, $xr12, $xr17 ++0x3a 0x00 0x2f 0x75 # CHECK: xvsigncov.w $xr26, $xr1, $xr0 ++0x6a 0xbb 0x2f 0x75 # CHECK: xvsigncov.d $xr10, $xr27, $xr14 ++0x2f 0xa3 0x30 0x75 # CHECK: xvfadd.s $xr15, $xr25, $xr8 ++0xd3 0x54 0x31 0x75 # CHECK: xvfadd.d $xr19, $xr6, $xr21 ++0xda 0x98 0x32 0x75 # CHECK: xvfsub.s $xr26, $xr6, $xr6 ++0x09 0x54 0x33 0x75 # CHECK: xvfsub.d $xr9, $xr0, $xr21 ++0x06 0xb9 0x38 0x75 # CHECK: xvfmul.s $xr6, $xr8, $xr14 ++0xab 0x6a 0x39 0x75 # CHECK: xvfmul.d $xr11, $xr21, $xr26 ++0xeb 0x98 0x3a 0x75 # CHECK: xvfdiv.s $xr11, $xr7, $xr6 ++0x40 0x13 0x3b 0x75 # CHECK: xvfdiv.d $xr0, $xr26, $xr4 ++0x27 0x91 0x3c 0x75 # CHECK: xvfmax.s $xr7, $xr9, $xr4 ++0x40 0x53 0x3d 0x75 # CHECK: xvfmax.d $xr0, $xr26, $xr20 ++0x48 0xe9 0x3e 0x75 # CHECK: xvfmin.s $xr8, $xr10, $xr26 ++0xc2 0x66 0x3f 0x75 # CHECK: xvfmin.d $xr2, $xr22, $xr25 ++0x91 0x84 0x40 0x75 # CHECK: xvfmaxa.s $xr17, $xr4, $xr1 ++0xfb 0x26 0x41 0x75 # CHECK: xvfmaxa.d $xr27, $xr23, $xr9 ++0x75 0xec 0x42 0x75 # CHECK: xvfmina.s $xr21, $xr3, $xr27 ++0xc7 0x10 0x43 0x75 # CHECK: xvfmina.d $xr7, $xr6, $xr4 ++0x49 0x51 0x46 0x75 # CHECK: xvfcvt.h.s $xr9, $xr10, $xr20 ++0xe5 0xd6 0x46 0x75 # CHECK: xvfcvt.s.d $xr5, $xr23, $xr21 ++0x1c 0x2b 0x48 0x75 # CHECK: xvffint.s.l $xr28, $xr24, $xr10 ++0x06 0x87 0x49 0x75 # CHECK: xvftint.w.d $xr6, $xr24, $xr1 ++0x5b 0x7b 0x4a 0x75 # CHECK: xvftintrm.w.d $xr27, $xr26, $xr30 ++0x9f 0x85 0x4a 0x75 # CHECK: xvftintrp.w.d $xr31, $xr12, $xr1 ++0xab 0x56 0x4b 0x75 # CHECK: xvftintrz.w.d $xr11, $xr21, $xr21 ++0x0f 0xf1 0x4b 0x75 # CHECK: xvftintrne.w.d $xr15, $xr8, $xr28 ++0xb4 0x8e 0x7a 0x75 # CHECK: xvshuf.h $xr20, $xr21, $xr3 ++0x56 0x7c 0x7b 0x75 # CHECK: xvshuf.w $xr22, $xr2, $xr31 ++0x6f 0xe8 0x7b 0x75 # CHECK: xvshuf.d $xr15, $xr3, $xr26 ++0xf5 0x62 0x7d 0x75 # CHECK: xvperm.w $xr21, $xr23, $xr24 ++0xbc 0x04 0x80 0x76 # CHECK: xvseqi.b $xr28, $xr5, 1 ++0x33 0xed 0x80 0x76 # CHECK: xvseqi.h $xr19, $xr9, -5 ++0x48 0x7a 0x81 0x76 # CHECK: xvseqi.w $xr8, $xr18, -2 ++0xc2 0xf2 0x81 0x76 # CHECK: xvseqi.d $xr2, $xr22, -4 ++0xa4 0x5a 0x82 0x76 # CHECK: xvslei.b $xr4, $xr21, -10 ++0x91 0xd2 0x82 0x76 # CHECK: xvslei.h $xr17, $xr20, -12 ++0x89 0x66 0x83 0x76 # CHECK: xvslei.w $xr9, $xr20, -7 ++0xd3 0xab 0x83 0x76 # CHECK: xvslei.d $xr19, $xr30, 10 ++0x44 0x07 0x84 0x76 # CHECK: xvslei.bu $xr4, $xr26, 1 ++0x0b 0x91 0x84 0x76 # CHECK: xvslei.hu $xr11, $xr8, 4 ++0x92 0x7d 0x85 0x76 # CHECK: xvslei.wu $xr18, $xr12, 31 ++0xfe 0xe8 0x85 0x76 # CHECK: xvslei.du $xr30, $xr7, 26 ++0xab 0x0b 0x86 0x76 # CHECK: xvslti.b $xr11, $xr29, 2 ++0x66 0xa3 0x86 0x76 # CHECK: xvslti.h $xr6, $xr27, 8 ++0xf5 0x06 0x87 0x76 # CHECK: xvslti.w $xr21, $xr23, 1 ++0xf2 0xef 0x87 0x76 # CHECK: xvslti.d $xr18, $xr31, -5 ++0x9b 0x45 0x88 0x76 # CHECK: xvslti.bu $xr27, $xr12, 17 ++0xd2 0xb1 0x88 0x76 # CHECK: xvslti.hu $xr18, $xr14, 12 ++0x84 0x39 0x89 0x76 # CHECK: xvslti.wu $xr4, $xr12, 14 ++0x1a 0xe0 0x89 0x76 # CHECK: xvslti.du $xr26, $xr0, 24 ++0x5e 0x14 0x8a 0x76 # CHECK: xvaddi.bu $xr30, $xr2, 5 ++0x36 0xa6 0x8a 0x76 # CHECK: xvaddi.hu $xr22, $xr17, 9 ++0x43 0x77 0x8b 0x76 # CHECK: xvaddi.wu $xr3, $xr26, 29 ++0x80 0xfa 0x8b 0x76 # CHECK: xvaddi.du $xr0, $xr20, 30 ++0x80 0x1e 0x8c 0x76 # CHECK: xvsubi.bu $xr0, $xr20, 7 ++0x04 0xcb 0x8c 0x76 # CHECK: xvsubi.hu $xr4, $xr24, 18 ++0x41 0x6b 0x8d 0x76 # CHECK: xvsubi.wu $xr1, $xr26, 26 ++0x89 0xa3 0x8d 0x76 # CHECK: xvsubi.du $xr9, $xr28, 8 ++0xa0 0x22 0x8e 0x76 # CHECK: xvbsll.v $xr0, $xr21, 8 ++0x04 0xf1 0x8e 0x76 # CHECK: xvbsrl.v $xr4, $xr8, 28 ++0x28 0x48 0x90 0x76 # CHECK: xvmaxi.b $xr8, $xr1, -14 ++0x93 0xc1 0x90 0x76 # CHECK: xvmaxi.h $xr19, $xr12, -16 ++0x3b 0x14 0x91 0x76 # CHECK: xvmaxi.w $xr27, $xr1, 5 ++0xe6 0x8c 0x91 0x76 # CHECK: xvmaxi.d $xr6, $xr7, 3 ++0xca 0x14 0x92 0x76 # CHECK: xvmini.b $xr10, $xr6, 5 ++0x48 0xd2 0x92 0x76 # CHECK: xvmini.h $xr8, $xr18, -12 ++0xbf 0x65 0x93 0x76 # CHECK: xvmini.w $xr31, $xr13, -7 ++0x6f 0xa7 0x93 0x76 # CHECK: xvmini.d $xr15, $xr27, 9 ++0x25 0x5a 0x94 0x76 # CHECK: xvmaxi.bu $xr5, $xr17, 22 ++0x66 0x90 0x94 0x76 # CHECK: xvmaxi.hu $xr6, $xr3, 4 ++0x9a 0x45 0x95 0x76 # CHECK: xvmaxi.wu $xr26, $xr12, 17 ++0x7e 0xf9 0x95 0x76 # CHECK: xvmaxi.du $xr30, $xr11, 30 ++0x0f 0x1d 0x96 0x76 # CHECK: xvmini.bu $xr15, $xr8, 7 ++0x32 0x87 0x96 0x76 # CHECK: xvmini.hu $xr18, $xr25, 1 ++0x90 0x03 0x97 0x76 # CHECK: xvmini.wu $xr16, $xr28, 0 ++0x6a 0xf6 0x97 0x76 # CHECK: xvmini.du $xr10, $xr19, 29 ++0x28 0x0b 0x9a 0x76 # CHECK: xvfrstpi.b $xr8, $xr25, 2 ++0x7c 0xea 0x9a 0x76 # CHECK: xvfrstpi.h $xr28, $xr19, 26 ++0x02 0x01 0x9c 0x76 # CHECK: xvclo.b $xr2, $xr8 ++0x2a 0x05 0x9c 0x76 # CHECK: xvclo.h $xr10, $xr9 ++0xe2 0x0b 0x9c 0x76 # CHECK: xvclo.w $xr2, $xr31 ++0x15 0x0f 0x9c 0x76 # CHECK: xvclo.d $xr21, $xr24 ++0x0d 0x13 0x9c 0x76 # CHECK: xvclz.b $xr13, $xr24 ++0xe4 0x17 0x9c 0x76 # CHECK: xvclz.h $xr4, $xr31 ++0x27 0x18 0x9c 0x76 # CHECK: xvclz.w $xr7, $xr1 ++0xcd 0x1e 0x9c 0x76 # CHECK: xvclz.d $xr13, $xr22 ++0x49 0x23 0x9c 0x76 # CHECK: xvpcnt.b $xr9, $xr26 ++0x6a 0x24 0x9c 0x76 # CHECK: xvpcnt.h $xr10, $xr3 ++0xf8 0x28 0x9c 0x76 # CHECK: xvpcnt.w $xr24, $xr7 ++0x05 0x2d 0x9c 0x76 # CHECK: xvpcnt.d $xr5, $xr8 ++0x73 0x31 0x9c 0x76 # CHECK: xvneg.b $xr19, $xr11 ++0xb5 0x36 0x9c 0x76 # CHECK: xvneg.h $xr21, $xr21 ++0x33 0x3a 0x9c 0x76 # CHECK: xvneg.w $xr19, $xr17 ++0xbf 0x3f 0x9c 0x76 # CHECK: xvneg.d $xr31, $xr29 ++0x76 0x43 0x9c 0x76 # CHECK: xvmskltz.b $xr22, $xr27 ++0x05 0x44 0x9c 0x76 # CHECK: xvmskltz.h $xr5, $xr0 ++0x98 0x4b 0x9c 0x76 # CHECK: xvmskltz.w $xr24, $xr28 ++0x59 0x4c 0x9c 0x76 # CHECK: xvmskltz.d $xr25, $xr2 ++0xde 0x53 0x9c 0x76 # CHECK: xvmskgez.b $xr30, $xr30 ++0x85 0x62 0x9c 0x76 # CHECK: xvmsknz.b $xr5, $xr20 ++0x21 0x9b 0x9c 0x76 # CHECK: xvseteqz.v $fcc1, $xr25 ++0xa5 0x9d 0x9c 0x76 # CHECK: xvsetnez.v $fcc5, $xr13 ++0x80 0xa0 0x9c 0x76 # CHECK: xvsetanyeqz.b $fcc0, $xr4 ++0xe0 0xa7 0x9c 0x76 # CHECK: xvsetanyeqz.h $fcc0, $xr31 ++0xc2 0xab 0x9c 0x76 # CHECK: xvsetanyeqz.w $fcc2, $xr30 ++0xe3 0xaf 0x9c 0x76 # CHECK: xvsetanyeqz.d $fcc3, $xr31 ++0xa1 0xb2 0x9c 0x76 # CHECK: xvsetallnez.b $fcc1, $xr21 ++0xa0 0xb6 0x9c 0x76 # CHECK: xvsetallnez.h $fcc0, $xr21 ++0x00 0xb8 0x9c 0x76 # CHECK: xvsetallnez.w $fcc0, $xr0 ++0xe1 0xbf 0x9c 0x76 # CHECK: xvsetallnez.d $fcc1, $xr31 ++0x95 0xc4 0x9c 0x76 # CHECK: xvflogb.s $xr21, $xr4 ++0x88 0xca 0x9c 0x76 # CHECK: xvflogb.d $xr8, $xr20 ++0xaf 0xd7 0x9c 0x76 # CHECK: xvfclass.s $xr15, $xr29 ++0xc7 0xd9 0x9c 0x76 # CHECK: xvfclass.d $xr7, $xr14 ++0x7c 0xe6 0x9c 0x76 # CHECK: xvfsqrt.s $xr28, $xr19 ++0xeb 0xeb 0x9c 0x76 # CHECK: xvfsqrt.d $xr11, $xr31 ++0xe6 0xf6 0x9c 0x76 # CHECK: xvfrecip.s $xr6, $xr23 ++0x00 0xfb 0x9c 0x76 # CHECK: xvfrecip.d $xr0, $xr24 ++0x08 0x06 0x9d 0x76 # CHECK: xvfrsqrt.s $xr8, $xr16 ++0x2f 0x0a 0x9d 0x76 # CHECK: xvfrsqrt.d $xr15, $xr17 ++0x24 0x37 0x9d 0x76 # CHECK: xvfrint.s $xr4, $xr25 ++0x81 0x3a 0x9d 0x76 # CHECK: xvfrint.d $xr1, $xr20 ++0x1d 0x46 0x9d 0x76 # CHECK: xvfrintrm.s $xr29, $xr16 ++0x44 0x49 0x9d 0x76 # CHECK: xvfrintrm.d $xr4, $xr10 ++0xed 0x57 0x9d 0x76 # CHECK: xvfrintrp.s $xr13, $xr31 ++0x74 0x59 0x9d 0x76 # CHECK: xvfrintrp.d $xr20, $xr11 ++0xbb 0x65 0x9d 0x76 # CHECK: xvfrintrz.s $xr27, $xr13 ++0x31 0x6b 0x9d 0x76 # CHECK: xvfrintrz.d $xr17, $xr25 ++0x0e 0x75 0x9d 0x76 # CHECK: xvfrintrne.s $xr14, $xr8 ++0x57 0x7b 0x9d 0x76 # CHECK: xvfrintrne.d $xr23, $xr26 ++0xe4 0xea 0x9d 0x76 # CHECK: xvfcvtl.s.h $xr4, $xr23 ++0x6e 0xed 0x9d 0x76 # CHECK: xvfcvth.s.h $xr14, $xr11 ++0xfa 0xf3 0x9d 0x76 # CHECK: xvfcvtl.d.s $xr26, $xr31 ++0x8d 0xf7 0x9d 0x76 # CHECK: xvfcvth.d.s $xr13, $xr28 ++0x8e 0x03 0x9e 0x76 # CHECK: xvffint.s.w $xr14, $xr28 ++0x00 0x05 0x9e 0x76 # CHECK: xvffint.s.wu $xr0, $xr8 ++0x65 0x0b 0x9e 0x76 # CHECK: xvffint.d.l $xr5, $xr27 ++0x5d 0x0e 0x9e 0x76 # CHECK: xvffint.d.lu $xr29, $xr18 ++0x89 0x12 0x9e 0x76 # CHECK: xvffintl.d.w $xr9, $xr20 ++0xab 0x15 0x9e 0x76 # CHECK: xvffinth.d.w $xr11, $xr13 ++0x86 0x30 0x9e 0x76 # CHECK: xvftint.w.s $xr6, $xr4 ++0xcb 0x36 0x9e 0x76 # CHECK: xvftint.l.d $xr11, $xr22 ++0xb4 0x3a 0x9e 0x76 # CHECK: xvftintrm.w.s $xr20, $xr21 ++0x7c 0x3f 0x9e 0x76 # CHECK: xvftintrm.l.d $xr28, $xr27 ++0x0e 0x42 0x9e 0x76 # CHECK: xvftintrp.w.s $xr14, $xr16 ++0x2e 0x47 0x9e 0x76 # CHECK: xvftintrp.l.d $xr14, $xr25 ++0xc5 0x4b 0x9e 0x76 # CHECK: xvftintrz.w.s $xr5, $xr30 ++0x6b 0x4e 0x9e 0x76 # CHECK: xvftintrz.l.d $xr11, $xr19 ++0xfb 0x52 0x9e 0x76 # CHECK: xvftintrne.w.s $xr27, $xr23 ++0xbb 0x55 0x9e 0x76 # CHECK: xvftintrne.l.d $xr27, $xr13 ++0x5c 0x58 0x9e 0x76 # CHECK: xvftint.wu.s $xr28, $xr2 ++0x9b 0x5d 0x9e 0x76 # CHECK: xvftint.lu.d $xr27, $xr12 ++0xb5 0x73 0x9e 0x76 # CHECK: xvftintrz.wu.s $xr21, $xr29 ++0x53 0x74 0x9e 0x76 # CHECK: xvftintrz.lu.d $xr19, $xr2 ++0x42 0x82 0x9e 0x76 # CHECK: xvftintl.l.s $xr2, $xr18 ++0xc8 0x87 0x9e 0x76 # CHECK: xvftinth.l.s $xr8, $xr30 ++0x2d 0x8a 0x9e 0x76 # CHECK: xvftintrml.l.s $xr13, $xr17 ++0x5e 0x8f 0x9e 0x76 # CHECK: xvftintrmh.l.s $xr30, $xr26 ++0x4b 0x93 0x9e 0x76 # CHECK: xvftintrpl.l.s $xr11, $xr26 ++0x7e 0x95 0x9e 0x76 # CHECK: xvftintrph.l.s $xr30, $xr11 ++0xf9 0x98 0x9e 0x76 # CHECK: xvftintrzl.l.s $xr25, $xr7 ++0xac 0x9c 0x9e 0x76 # CHECK: xvftintrzh.l.s $xr12, $xr5 ++0x08 0xa3 0x9e 0x76 # CHECK: xvftintrnel.l.s $xr8, $xr24 ++0x19 0xa7 0x9e 0x76 # CHECK: xvftintrneh.l.s $xr25, $xr24 ++0xb7 0xe0 0x9e 0x76 # CHECK: xvexth.h.b $xr23, $xr5 ++0xd9 0xe4 0x9e 0x76 # CHECK: xvexth.w.h $xr25, $xr6 ++0x67 0xeb 0x9e 0x76 # CHECK: xvexth.d.w $xr7, $xr27 ++0x4e 0xed 0x9e 0x76 # CHECK: xvexth.q.d $xr14, $xr10 ++0xa0 0xf2 0x9e 0x76 # CHECK: xvexth.hu.bu $xr0, $xr21 ++0xcf 0xf6 0x9e 0x76 # CHECK: xvexth.wu.hu $xr15, $xr22 ++0xf8 0xf9 0x9e 0x76 # CHECK: xvexth.du.wu $xr24, $xr15 ++0x44 0xfc 0x9e 0x76 # CHECK: xvexth.qu.du $xr4, $xr2 ++0xd5 0x00 0x9f 0x76 # CHECK: xvreplgr2vr.b $xr21, $r6 ++0x2b 0x04 0x9f 0x76 # CHECK: xvreplgr2vr.h $xr11, $ra ++0xcd 0x0a 0x9f 0x76 # CHECK: xvreplgr2vr.w $xr13, $r22 ++0x29 0x0e 0x9f 0x76 # CHECK: xvreplgr2vr.d $xr9, $r17 ++0x12 0x12 0x9f 0x76 # CHECK: vext2xv.h.b $xr18, $xr16 ++0xe3 0x16 0x9f 0x76 # CHECK: vext2xv.w.b $xr3, $xr23 ++0x1e 0x1a 0x9f 0x76 # CHECK: vext2xv.d.b $xr30, $xr16 ++0xfc 0x1e 0x9f 0x76 # CHECK: vext2xv.w.h $xr28, $xr23 ++0x24 0x20 0x9f 0x76 # CHECK: vext2xv.d.h $xr4, $xr1 ++0x97 0x25 0x9f 0x76 # CHECK: vext2xv.d.w $xr23, $xr12 ++0xa0 0x28 0x9f 0x76 # CHECK: vext2xv.hu.bu $xr0, $xr5 ++0x81 0x2c 0x9f 0x76 # CHECK: vext2xv.wu.bu $xr1, $xr4 ++0x71 0x31 0x9f 0x76 # CHECK: vext2xv.du.bu $xr17, $xr11 ++0x1c 0x34 0x9f 0x76 # CHECK: vext2xv.wu.hu $xr28, $xr0 ++0x3a 0x3b 0x9f 0x76 # CHECK: vext2xv.du.hu $xr26, $xr25 ++0xdd 0x3d 0x9f 0x76 # CHECK: vext2xv.du.wu $xr29, $xr14 ++0xc3 0xb6 0x9f 0x76 # CHECK: xvhseli.d $xr3, $xr22, 13 ++0xc0 0x29 0xa0 0x76 # CHECK: xvrotri.b $xr0, $xr14, 2 ++0xe0 0x6c 0xa0 0x76 # CHECK: xvrotri.h $xr0, $xr7, 11 ++0x38 0x8c 0xa0 0x76 # CHECK: xvrotri.w $xr24, $xr1, 3 ++0xff 0x40 0xa1 0x76 # CHECK: xvrotri.d $xr31, $xr7, 16 ++0x74 0x26 0xa4 0x76 # CHECK: xvsrlri.b $xr20, $xr19, 1 ++0x3c 0x6c 0xa4 0x76 # CHECK: xvsrlri.h $xr28, $xr1, 11 ++0x59 0xec 0xa4 0x76 # CHECK: xvsrlri.w $xr25, $xr2, 27 ++0x3d 0x19 0xa5 0x76 # CHECK: xvsrlri.d $xr29, $xr9, 6 ++0xa7 0x28 0xa8 0x76 # CHECK: xvsrari.b $xr7, $xr5, 2 ++0x40 0x65 0xa8 0x76 # CHECK: xvsrari.h $xr0, $xr10, 9 ++0x11 0xab 0xa8 0x76 # CHECK: xvsrari.w $xr17, $xr24, 10 ++0xc7 0x99 0xa9 0x76 # CHECK: xvsrari.d $xr7, $xr14, 38 ++0xe5 0xc7 0xeb 0x76 # CHECK: xvinsgr2vr.w $xr5, $r31, 1 ++0x45 0xe7 0xeb 0x76 # CHECK: xvinsgr2vr.d $xr5, $r26, 1 ++0x92 0xcb 0xef 0x76 # CHECK: xvpickve2gr.w $r18, $xr28, 2 ++0x54 0xe5 0xef 0x76 # CHECK: xvpickve2gr.d $r20, $xr10, 1 ++0x89 0xd9 0xf3 0x76 # CHECK: xvpickve2gr.wu $r9, $xr12, 6 ++0xa9 0xe9 0xf3 0x76 # CHECK: xvpickve2gr.du $r9, $xr13, 2 ++0xc1 0x97 0xf7 0x76 # CHECK: xvrepl128vei.b $xr1, $xr30, 5 ++0xad 0xdd 0xf7 0x76 # CHECK: xvrepl128vei.h $xr13, $xr13, 7 ++0xa7 0xe9 0xf7 0x76 # CHECK: xvrepl128vei.w $xr7, $xr13, 2 ++0xe2 0xf7 0xf7 0x76 # CHECK: xvrepl128vei.d $xr2, $xr31, 1 ++0xa4 0xcd 0xff 0x76 # CHECK: xvinsve0.w $xr4, $xr13, 3 ++0x3b 0xe3 0xff 0x76 # CHECK: xvinsve0.d $xr27, $xr25, 0 ++0x7d 0xde 0x03 0x77 # CHECK: xvpickve.w $xr29, $xr19, 7 ++0x13 0xee 0x03 0x77 # CHECK: xvpickve.d $xr19, $xr16, 3 ++0xa5 0x00 0x07 0x77 # CHECK: xvreplve0.b $xr5, $xr5 ++0x0e 0x83 0x07 0x77 # CHECK: xvreplve0.h $xr14, $xr24 ++0xaf 0xc1 0x07 0x77 # CHECK: xvreplve0.w $xr15, $xr13 ++0x94 0xe2 0x07 0x77 # CHECK: xvreplve0.d $xr20, $xr20 ++0x45 0xf1 0x07 0x77 # CHECK: xvreplve0.q $xr5, $xr10 ++0x1f 0x2c 0x08 0x77 # CHECK: xvsllwil.h.b $xr31, $xr0, 3 ++0x15 0x5f 0x08 0x77 # CHECK: xvsllwil.w.h $xr21, $xr24, 7 ++0x1a 0xcb 0x08 0x77 # CHECK: xvsllwil.d.w $xr26, $xr24, 18 ++0xc5 0x00 0x09 0x77 # CHECK: xvextl.q.d $xr5, $xr6 ++0xed 0x3b 0x0c 0x77 # CHECK: xvsllwil.hu.bu $xr13, $xr31, 6 ++0x93 0x62 0x0c 0x77 # CHECK: xvsllwil.wu.hu $xr19, $xr20, 8 ++0xae 0x89 0x0c 0x77 # CHECK: xvsllwil.du.wu $xr14, $xr13, 2 ++0xea 0x00 0x0d 0x77 # CHECK: xvextl.qu.du $xr10, $xr7 ++0xbf 0x36 0x10 0x77 # CHECK: xvbitclri.b $xr31, $xr21, 5 ++0x9a 0x48 0x10 0x77 # CHECK: xvbitclri.h $xr26, $xr4, 2 ++0x35 0xbf 0x10 0x77 # CHECK: xvbitclri.w $xr21, $xr25, 15 ++0x0e 0xfc 0x11 0x77 # CHECK: xvbitclri.d $xr14, $xr0, 63 ++0x30 0x34 0x14 0x77 # CHECK: xvbitseti.b $xr16, $xr1, 5 ++0xd3 0x4f 0x14 0x77 # CHECK: xvbitseti.h $xr19, $xr30, 3 ++0xd2 0xee 0x14 0x77 # CHECK: xvbitseti.w $xr18, $xr22, 27 ++0x2f 0xa0 0x15 0x77 # CHECK: xvbitseti.d $xr15, $xr1, 40 ++0xb7 0x20 0x18 0x77 # CHECK: xvbitrevi.b $xr23, $xr5, 0 ++0x45 0x5c 0x18 0x77 # CHECK: xvbitrevi.h $xr5, $xr2, 7 ++0xd7 0xb0 0x18 0x77 # CHECK: xvbitrevi.w $xr23, $xr6, 12 ++0xd2 0x85 0x19 0x77 # CHECK: xvbitrevi.d $xr18, $xr14, 33 ++0x5b 0x33 0x24 0x77 # CHECK: xvsat.b $xr27, $xr26, 4 ++0xa4 0x56 0x24 0x77 # CHECK: xvsat.h $xr4, $xr21, 5 ++0x7d 0xab 0x24 0x77 # CHECK: xvsat.w $xr29, $xr27, 10 ++0x0e 0xf0 0x25 0x77 # CHECK: xvsat.d $xr14, $xr0, 60 ++0x3f 0x2f 0x28 0x77 # CHECK: xvsat.bu $xr31, $xr25, 3 ++0x91 0x78 0x28 0x77 # CHECK: xvsat.hu $xr17, $xr4, 14 ++0x31 0x92 0x28 0x77 # CHECK: xvsat.wu $xr17, $xr17, 4 ++0x0b 0xac 0x29 0x77 # CHECK: xvsat.du $xr11, $xr0, 43 ++0x18 0x2b 0x2c 0x77 # CHECK: xvslli.b $xr24, $xr24, 2 ++0x37 0x5d 0x2c 0x77 # CHECK: xvslli.h $xr23, $xr9, 7 ++0x8d 0xc1 0x2c 0x77 # CHECK: xvslli.w $xr13, $xr12, 16 ++0xcb 0x46 0x2d 0x77 # CHECK: xvslli.d $xr11, $xr22, 17 ++0xc9 0x25 0x30 0x77 # CHECK: xvsrli.b $xr9, $xr14, 1 ++0x96 0x7e 0x30 0x77 # CHECK: xvsrli.h $xr22, $xr20, 15 ++0xc5 0xd3 0x30 0x77 # CHECK: xvsrli.w $xr5, $xr30, 20 ++0x01 0xea 0x31 0x77 # CHECK: xvsrli.d $xr1, $xr16, 58 ++0xd2 0x28 0x34 0x77 # CHECK: xvsrai.b $xr18, $xr6, 2 ++0x15 0x72 0x34 0x77 # CHECK: xvsrai.h $xr21, $xr16, 12 ++0x2d 0xc6 0x34 0x77 # CHECK: xvsrai.w $xr13, $xr17, 17 ++0x83 0xcd 0x35 0x77 # CHECK: xvsrai.d $xr3, $xr12, 51 ++0xe1 0x50 0x40 0x77 # CHECK: xvsrlni.b.h $xr1, $xr7, 4 ++0xb0 0xe6 0x40 0x77 # CHECK: xvsrlni.h.w $xr16, $xr21, 25 ++0x4d 0xc1 0x41 0x77 # CHECK: xvsrlni.w.d $xr13, $xr10, 48 ++0x91 0xf9 0x43 0x77 # CHECK: xvsrlni.d.q $xr17, $xr12, 126 ++0x71 0x7e 0x44 0x77 # CHECK: xvsrlrni.b.h $xr17, $xr19, 15 ++0x15 0xbb 0x44 0x77 # CHECK: xvsrlrni.h.w $xr21, $xr24, 14 ++0xf4 0x0f 0x45 0x77 # CHECK: xvsrlrni.w.d $xr20, $xr31, 3 ++0x1c 0x33 0x47 0x77 # CHECK: xvsrlrni.d.q $xr28, $xr24, 76 ++0xfa 0x5c 0x48 0x77 # CHECK: xvssrlni.b.h $xr26, $xr7, 7 ++0x9b 0xe7 0x48 0x77 # CHECK: xvssrlni.h.w $xr27, $xr28, 25 ++0x04 0x41 0x49 0x77 # CHECK: xvssrlni.w.d $xr4, $xr8, 16 ++0x2e 0x52 0x4b 0x77 # CHECK: xvssrlni.d.q $xr14, $xr17, 84 ++0xd1 0x48 0x4c 0x77 # CHECK: xvssrlni.bu.h $xr17, $xr6, 2 ++0x46 0x8f 0x4c 0x77 # CHECK: xvssrlni.hu.w $xr6, $xr26, 3 ++0x4a 0xda 0x4d 0x77 # CHECK: xvssrlni.wu.d $xr10, $xr18, 54 ++0x5d 0x1b 0x4f 0x77 # CHECK: xvssrlni.du.q $xr29, $xr26, 70 ++0x26 0x59 0x50 0x77 # CHECK: xvssrlrni.b.h $xr6, $xr9, 6 ++0x16 0x85 0x50 0x77 # CHECK: xvssrlrni.h.w $xr22, $xr8, 1 ++0x3c 0x71 0x51 0x77 # CHECK: xvssrlrni.w.d $xr28, $xr9, 28 ++0x74 0xa3 0x53 0x77 # CHECK: xvssrlrni.d.q $xr20, $xr27, 104 ++0x99 0x70 0x54 0x77 # CHECK: xvssrlrni.bu.h $xr25, $xr4, 12 ++0xb5 0x97 0x54 0x77 # CHECK: xvssrlrni.hu.w $xr21, $xr29, 5 ++0x01 0xda 0x55 0x77 # CHECK: xvssrlrni.wu.d $xr1, $xr16, 54 ++0xfd 0x64 0x56 0x77 # CHECK: xvssrlrni.du.q $xr29, $xr7, 25 ++0x30 0x53 0x58 0x77 # CHECK: xvsrani.b.h $xr16, $xr25, 4 ++0x4d 0x99 0x58 0x77 # CHECK: xvsrani.h.w $xr13, $xr10, 6 ++0xa7 0xd6 0x59 0x77 # CHECK: xvsrani.w.d $xr7, $xr21, 53 ++0x5a 0xde 0x5a 0x77 # CHECK: xvsrani.d.q $xr26, $xr18, 55 ++0xb1 0x6e 0x5c 0x77 # CHECK: xvsrarni.b.h $xr17, $xr21, 11 ++0xcf 0x8b 0x5c 0x77 # CHECK: xvsrarni.h.w $xr15, $xr30, 2 ++0x77 0x7d 0x5d 0x77 # CHECK: xvsrarni.w.d $xr23, $xr11, 31 ++0x36 0x43 0x5e 0x77 # CHECK: xvsrarni.d.q $xr22, $xr25, 16 ++0x93 0x6a 0x60 0x77 # CHECK: xvssrani.b.h $xr19, $xr20, 10 ++0x39 0xd9 0x60 0x77 # CHECK: xvssrani.h.w $xr25, $xr9, 22 ++0x57 0x1c 0x61 0x77 # CHECK: xvssrani.w.d $xr23, $xr2, 7 ++0x06 0xfd 0x63 0x77 # CHECK: xvssrani.d.q $xr6, $xr8, 127 ++0xdb 0x55 0x64 0x77 # CHECK: xvssrani.bu.h $xr27, $xr14, 5 ++0x2e 0xd0 0x64 0x77 # CHECK: xvssrani.hu.w $xr14, $xr1, 20 ++0x8a 0xec 0x65 0x77 # CHECK: xvssrani.wu.d $xr10, $xr4, 59 ++0x31 0x48 0x67 0x77 # CHECK: xvssrani.du.q $xr17, $xr1, 82 ++0x5b 0x7e 0x68 0x77 # CHECK: xvssrarni.b.h $xr27, $xr18, 15 ++0x70 0xbc 0x68 0x77 # CHECK: xvssrarni.h.w $xr16, $xr3, 15 ++0x3a 0x4b 0x69 0x77 # CHECK: xvssrarni.w.d $xr26, $xr25, 18 ++0x3c 0x03 0x6a 0x77 # CHECK: xvssrarni.d.q $xr28, $xr25, 0 ++0x81 0x61 0x6c 0x77 # CHECK: xvssrarni.bu.h $xr1, $xr12, 8 ++0x63 0xff 0x6c 0x77 # CHECK: xvssrarni.hu.w $xr3, $xr27, 31 ++0x78 0xd3 0x6d 0x77 # CHECK: xvssrarni.wu.d $xr24, $xr27, 52 ++0x65 0xc0 0x6f 0x77 # CHECK: xvssrarni.du.q $xr5, $xr3, 112 ++0x35 0x8f 0x82 0x77 # CHECK: xvextrins.d $xr21, $xr25, 163 ++0x33 0x72 0x84 0x77 # CHECK: xvextrins.w $xr19, $xr17, 28 ++0xfe 0x3c 0x89 0x77 # CHECK: xvextrins.h $xr30, $xr7, 79 ++0xe1 0x4b 0x8f 0x77 # CHECK: xvextrins.b $xr1, $xr31, 210 ++0xc3 0x52 0x92 0x77 # CHECK: xvshuf4i.b $xr3, $xr22, 148 ++0xc2 0x8a 0x94 0x77 # CHECK: xvshuf4i.h $xr2, $xr22, 34 ++0x7f 0x96 0x9a 0x77 # CHECK: xvshuf4i.w $xr31, $xr19, 165 ++0x3f 0x3a 0x9c 0x77 # CHECK: xvshuf4i.d $xr31, $xr17, 14 ++0x1b 0x40 0xc5 0x77 # CHECK: xvbitseli.b $xr27, $xr0, 80 ++0x57 0x64 0xd2 0x77 # CHECK: xvandi.b $xr23, $xr2, 153 ++0x9b 0xf3 0xd6 0x77 # CHECK: xvori.b $xr27, $xr28, 188 ++0x3c 0xf8 0xdb 0x77 # CHECK: xvxori.b $xr28, $xr1, 254 ++0x44 0x90 0xdc 0x77 # CHECK: xvnori.b $xr4, $xr2, 36 ++0x1a 0xc2 0xe2 0x77 # CHECK: xvldi $xr26, -2544 ++0x16 0xa3 0xe6 0x77 # CHECK: xvpermi.w $xr22, $xr24, 168 ++0xee 0x23 0xea 0x77 # CHECK: xvpermi.d $xr14, $xr31, 136 ++0xdc 0x4d 0xef 0x77 # CHECK: xvpermi.q $xr28, $xr14, 211 ++0xe0 0x7f 0x1e 0x70 # CHECK: vaddwev.h.b $vr0, $vr31, $vr31 ++0x83 0xdc 0x1e 0x70 # CHECK: vaddwev.w.h $vr3, $vr4, $vr23 ++0x5e 0x2f 0x1f 0x70 # CHECK: vaddwev.d.w $vr30, $vr26, $vr11 ++0xb9 0xb7 0x1f 0x70 # CHECK: vaddwev.q.d $vr25, $vr29, $vr13 ++0x8b 0x07 0x20 0x70 # CHECK: vsubwev.h.b $vr11, $vr28, $vr1 ++0xe9 0x95 0x20 0x70 # CHECK: vsubwev.w.h $vr9, $vr15, $vr5 ++0x31 0x29 0x21 0x70 # CHECK: vsubwev.d.w $vr17, $vr9, $vr10 ++0x5a 0xae 0x21 0x70 # CHECK: vsubwev.q.d $vr26, $vr18, $vr11 ++0x67 0x49 0x22 0x70 # CHECK: vaddwod.h.b $vr7, $vr11, $vr18 ++0xe0 0xb0 0x22 0x70 # CHECK: vaddwod.w.h $vr0, $vr7, $vr12 ++0x7e 0x43 0x23 0x70 # CHECK: vaddwod.d.w $vr30, $vr27, $vr16 ++0x82 0xf6 0x23 0x70 # CHECK: vaddwod.q.d $vr2, $vr20, $vr29 ++0xfa 0x4c 0x24 0x70 # CHECK: vsubwod.h.b $vr26, $vr7, $vr19 ++0x73 0xac 0x24 0x70 # CHECK: vsubwod.w.h $vr19, $vr3, $vr11 ++0x9f 0x33 0x25 0x70 # CHECK: vsubwod.d.w $vr31, $vr28, $vr12 ++0x01 0xc3 0x25 0x70 # CHECK: vsubwod.q.d $vr1, $vr24, $vr16 ++0xa3 0x77 0x2e 0x70 # CHECK: vaddwev.h.bu $vr3, $vr29, $vr29 ++0xea 0xa9 0x2e 0x70 # CHECK: vaddwev.w.hu $vr10, $vr15, $vr10 ++0xb8 0x13 0x2f 0x70 # CHECK: vaddwev.d.wu $vr24, $vr29, $vr4 ++0xf1 0x82 0x2f 0x70 # CHECK: vaddwev.q.du $vr17, $vr23, $vr0 ++0x79 0x51 0x30 0x70 # CHECK: vsubwev.h.bu $vr25, $vr11, $vr20 ++0xf1 0xd1 0x30 0x70 # CHECK: vsubwev.w.hu $vr17, $vr15, $vr20 ++0x2a 0x17 0x31 0x70 # CHECK: vsubwev.d.wu $vr10, $vr25, $vr5 ++0x7d 0xa0 0x31 0x70 # CHECK: vsubwev.q.du $vr29, $vr3, $vr8 ++0x0a 0x64 0x32 0x70 # CHECK: vaddwod.h.bu $vr10, $vr0, $vr25 ++0x62 0xdf 0x32 0x70 # CHECK: vaddwod.w.hu $vr2, $vr27, $vr23 ++0x02 0x58 0x33 0x70 # CHECK: vaddwod.d.wu $vr2, $vr0, $vr22 ++0x40 0x8c 0x33 0x70 # CHECK: vaddwod.q.du $vr0, $vr2, $vr3 ++0xee 0x0f 0x34 0x70 # CHECK: vsubwod.h.bu $vr14, $vr31, $vr3 ++0x55 0x9c 0x34 0x70 # CHECK: vsubwod.w.hu $vr21, $vr2, $vr7 ++0x0b 0x49 0x35 0x70 # CHECK: vsubwod.d.wu $vr11, $vr8, $vr18 ++0x9e 0x82 0x35 0x70 # CHECK: vsubwod.q.du $vr30, $vr20, $vr0 ++0x93 0x47 0x3e 0x70 # CHECK: vaddwev.h.bu.b $vr19, $vr28, $vr17 ++0xee 0xf9 0x3e 0x70 # CHECK: vaddwev.w.hu.h $vr14, $vr15, $vr30 ++0xef 0x28 0x3f 0x70 # CHECK: vaddwev.d.wu.w $vr15, $vr7, $vr10 ++0xd3 0xf9 0x3f 0x70 # CHECK: vaddwev.q.du.d $vr19, $vr14, $vr30 ++0x4f 0x22 0x40 0x70 # CHECK: vaddwod.h.bu.b $vr15, $vr18, $vr8 ++0x73 0x9b 0x40 0x70 # CHECK: vaddwod.w.hu.h $vr19, $vr27, $vr6 ++0x67 0x3d 0x41 0x70 # CHECK: vaddwod.d.wu.w $vr7, $vr11, $vr15 ++0x00 0xe8 0x41 0x70 # CHECK: vaddwod.q.du.d $vr0, $vr0, $vr26 ++0x78 0x56 0x90 0x70 # CHECK: vmulwev.h.b $vr24, $vr19, $vr21 ++0xcd 0xca 0x90 0x70 # CHECK: vmulwev.w.h $vr13, $vr22, $vr18 ++0xd8 0x36 0x91 0x70 # CHECK: vmulwev.d.w $vr24, $vr22, $vr13 ++0xc4 0xfa 0x91 0x70 # CHECK: vmulwev.q.d $vr4, $vr22, $vr30 ++0x56 0x63 0x92 0x70 # CHECK: vmulwod.h.b $vr22, $vr26, $vr24 ++0x91 0x91 0x92 0x70 # CHECK: vmulwod.w.h $vr17, $vr12, $vr4 ++0xf0 0x69 0x93 0x70 # CHECK: vmulwod.d.w $vr16, $vr15, $vr26 ++0x03 0x96 0x93 0x70 # CHECK: vmulwod.q.d $vr3, $vr16, $vr5 ++0x7f 0x4e 0x98 0x70 # CHECK: vmulwev.h.bu $vr31, $vr19, $vr19 ++0xf6 0x97 0x98 0x70 # CHECK: vmulwev.w.hu $vr22, $vr31, $vr5 ++0x80 0x78 0x99 0x70 # CHECK: vmulwev.d.wu $vr0, $vr4, $vr30 ++0x7f 0xd0 0x99 0x70 # CHECK: vmulwev.q.du $vr31, $vr3, $vr20 ++0xf9 0x34 0x9a 0x70 # CHECK: vmulwod.h.bu $vr25, $vr7, $vr13 ++0x81 0xb1 0x9a 0x70 # CHECK: vmulwod.w.hu $vr1, $vr12, $vr12 ++0xef 0x79 0x9b 0x70 # CHECK: vmulwod.d.wu $vr15, $vr15, $vr30 ++0x8d 0x9b 0x9b 0x70 # CHECK: vmulwod.q.du $vr13, $vr28, $vr6 ++0x48 0x0f 0xa0 0x70 # CHECK: vmulwev.h.bu.b $vr8, $vr26, $vr3 ++0x2a 0x87 0xa0 0x70 # CHECK: vmulwev.w.hu.h $vr10, $vr25, $vr1 ++0x09 0x4c 0xa1 0x70 # CHECK: vmulwev.d.wu.w $vr9, $vr0, $vr19 ++0x0d 0xdf 0xa1 0x70 # CHECK: vmulwev.q.du.d $vr13, $vr24, $vr23 ++0x14 0x38 0xa2 0x70 # CHECK: vmulwod.h.bu.b $vr20, $vr0, $vr14 ++0x90 0x8e 0xa2 0x70 # CHECK: vmulwod.w.hu.h $vr16, $vr20, $vr3 ++0xe5 0x6e 0xa3 0x70 # CHECK: vmulwod.d.wu.w $vr5, $vr23, $vr27 ++0xde 0xf7 0xa3 0x70 # CHECK: vmulwod.q.du.d $vr30, $vr30, $vr29 ++0x12 0x20 0xac 0x70 # CHECK: vmaddwev.h.b $vr18, $vr0, $vr8 ++0xdd 0x9e 0xac 0x70 # CHECK: vmaddwev.w.h $vr29, $vr22, $vr7 ++0xbc 0x7d 0xad 0x70 # CHECK: vmaddwev.d.w $vr28, $vr13, $vr31 ++0x65 0xb4 0xad 0x70 # CHECK: vmaddwev.q.d $vr5, $vr3, $vr13 ++0x24 0x24 0xae 0x70 # CHECK: vmaddwod.h.b $vr4, $vr1, $vr9 ++0x3a 0xe1 0xae 0x70 # CHECK: vmaddwod.w.h $vr26, $vr9, $vr24 ++0x7e 0x34 0xaf 0x70 # CHECK: vmaddwod.d.w $vr30, $vr3, $vr13 ++0xaf 0xf5 0xaf 0x70 # CHECK: vmaddwod.q.d $vr15, $vr13, $vr29 ++0x98 0x16 0xb4 0x70 # CHECK: vmaddwev.h.bu $vr24, $vr20, $vr5 ++0x83 0xa0 0xb4 0x70 # CHECK: vmaddwev.w.hu $vr3, $vr4, $vr8 ++0x7b 0x12 0xb5 0x70 # CHECK: vmaddwev.d.wu $vr27, $vr19, $vr4 ++0x7c 0xf7 0xb5 0x70 # CHECK: vmaddwev.q.du $vr28, $vr27, $vr29 ++0x85 0x6a 0xb6 0x70 # CHECK: vmaddwod.h.bu $vr5, $vr20, $vr26 ++0xd5 0xab 0xb6 0x70 # CHECK: vmaddwod.w.hu $vr21, $vr30, $vr10 ++0x67 0x51 0xb7 0x70 # CHECK: vmaddwod.d.wu $vr7, $vr11, $vr20 ++0x5e 0xe2 0xb7 0x70 # CHECK: vmaddwod.q.du $vr30, $vr18, $vr24 ++0x24 0x10 0xbc 0x70 # CHECK: vmaddwev.h.bu.b $vr4, $vr1, $vr4 ++0x79 0xbd 0xbc 0x70 # CHECK: vmaddwev.w.hu.h $vr25, $vr11, $vr15 ++0x0a 0x52 0xbd 0x70 # CHECK: vmaddwev.d.wu.w $vr10, $vr16, $vr20 ++0x96 0xde 0xbd 0x70 # CHECK: vmaddwev.q.du.d $vr22, $vr20, $vr23 ++0x3f 0x6f 0xbe 0x70 # CHECK: vmaddwod.h.bu.b $vr31, $vr25, $vr27 ++0x48 0xe2 0xbe 0x70 # CHECK: vmaddwod.w.hu.h $vr8, $vr18, $vr24 ++0xb2 0x29 0xbf 0x70 # CHECK: vmaddwod.d.wu.w $vr18, $vr13, $vr10 ++0xaa 0xbc 0xbf 0x70 # CHECK: vmaddwod.q.du.d $vr10, $vr5, $vr15 +diff --git a/llvm/test/Object/LoongArch/elf-flags.yaml b/llvm/test/Object/LoongArch/elf-flags.yaml +new file mode 100644 +index 000000000..b313d3b2b +--- /dev/null ++++ b/llvm/test/Object/LoongArch/elf-flags.yaml +@@ -0,0 +1,22 @@ ++# RUN: yaml2obj %s > %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=OBJ %s ++# RUN: obj2yaml %t | FileCheck --check-prefix=YAML %s ++ ++# OBJ: Flags [ (0x3) ++# OBJ-NEXT: EF_LARCH_ABI_LP64 (0x3) ++# OBJ-NEXT: ] ++ ++# YAML: FileHeader: ++# YAML-NEXT: Class: ELFCLASS64 ++# YAML-NEXT: Data: ELFDATA2LSB ++# YAML-NEXT: Type: ET_EXEC ++# YAML-NEXT: Machine: EM_LOONGARCH ++# YAML-NEXT: Flags: [ EF_LARCH_ABI_LP64 ] ++ ++--- !ELF ++FileHeader: ++ Class: ELFCLASS64 ++ Data: ELFDATA2LSB ++ Type: ET_EXEC ++ Machine: EM_LOONGARCH ++ Flags: [ EF_LARCH_ABI_LP64 ] +diff --git a/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml b/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml +new file mode 100644 +index 000000000..0bf2bedc5 +--- /dev/null ++++ b/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml +@@ -0,0 +1,193 @@ ++# RUN: yaml2obj %s > %t ++# RUN: llvm-readobj -r %t | FileCheck --check-prefix=OBJ %s ++# RUN: obj2yaml %t | FileCheck --check-prefix=YAML %s ++ ++# OBJ: Relocations [ ++# OBJ-NEXT: Section (2) .rela.text { ++# OBJ-NEXT: 0x40 R_LARCH_SOP_PUSH_PLT_PCREL foo 0x0 ++# OBJ-NEXT: 0x40 R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x0 ++# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x800 ++# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_GPREL shared 0x0 ++# OBJ-NEXT: 0x44 R_LARCH_SOP_ADD - 0x0 ++# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC ++# OBJ-NEXT: 0x44 R_LARCH_SOP_SR - 0x0 ++# OBJ-NEXT: 0x44 R_LARCH_SOP_POP_32_S_5_20 - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x4 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_GPREL shared 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_ADD - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x804 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_GPREL shared 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_ADD - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC ++# OBJ-NEXT: 0x48 R_LARCH_SOP_SR - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC ++# OBJ-NEXT: 0x48 R_LARCH_SOP_SL - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_SUB - 0x0 ++# OBJ-NEXT: 0x48 R_LARCH_SOP_POP_32_S_10_12 - 0x0 ++# OBJ-NEXT: 0x50 R_LARCH_SOP_PUSH_PLT_PCREL swap 0x0 ++# OBJ-NEXT: 0x50 R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x0 ++# OBJ-NEXT: } ++# OBJ-NEXT: ] ++ ++# YAML: Relocations: ++# YAML-NEXT: - Offset: 0x40 ++# YAML-NEXT: Symbol: foo ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PLT_PCREL ++# YAML-NEXT: - Offset: 0x40 ++# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL ++# YAML-NEXT: Addend: 2048 ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Symbol: shared ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Type: R_LARCH_SOP_ADD ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE ++# YAML-NEXT: Addend: 12 ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Type: R_LARCH_SOP_SR ++# YAML-NEXT: - Offset: 0x44 ++# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_5_20 ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL ++# YAML-NEXT: Addend: 4 ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Symbol: shared ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_ADD ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL ++# YAML-NEXT: Addend: 2052 ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Symbol: shared ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_ADD ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE ++# YAML-NEXT: Addend: 12 ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_SR ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE ++# YAML-NEXT: Addend: 12 ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_SL ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_SUB ++# YAML-NEXT: - Offset: 0x48 ++# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_10_12 ++# YAML-NEXT: - Offset: 0x50 ++# YAML-NEXT: Symbol: swap ++# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PLT_PCREL ++# YAML-NEXT: - Offset: 0x50 ++# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 ++ ++--- !ELF ++FileHeader: ++ Class: ELFCLASS64 ++ Data: ELFDATA2LSB ++ Type: ET_REL ++ Machine: EM_LOONGARCH ++ Flags: [ EF_LARCH_ABI_LP64 ] ++Sections: ++ - Name: .text ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ AddressAlign: 0x10 ++ - Name: .rela.text ++ Type: SHT_RELA ++ Flags: [ SHF_INFO_LINK ] ++ AddressAlign: 0x8 ++ Info: .text ++ Relocations: ++ - Offset: 0x40 ++ Symbol: foo ++ Type: R_LARCH_SOP_PUSH_PLT_PCREL ++ - Offset: 0x40 ++ Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 ++ - Offset: 0x44 ++ Symbol: _GLOBAL_OFFSET_TABLE_ ++ Type: R_LARCH_SOP_PUSH_PCREL ++ Addend: 2048 ++ - Offset: 0x44 ++ Symbol: shared ++ Type: R_LARCH_SOP_PUSH_GPREL ++ - Offset: 0x44 ++ Type: R_LARCH_SOP_ADD ++ - Offset: 0x44 ++ Type: R_LARCH_SOP_PUSH_ABSOLUTE ++ Addend: 12 ++ - Offset: 0x44 ++ Type: R_LARCH_SOP_SR ++ - Offset: 0x44 ++ Type: R_LARCH_SOP_POP_32_S_5_20 ++ - Offset: 0x48 ++ Symbol: _GLOBAL_OFFSET_TABLE_ ++ Type: R_LARCH_SOP_PUSH_PCREL ++ Addend: 4 ++ - Offset: 0x48 ++ Symbol: shared ++ Type: R_LARCH_SOP_PUSH_GPREL ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_ADD ++ - Offset: 0x48 ++ Symbol: _GLOBAL_OFFSET_TABLE_ ++ Type: R_LARCH_SOP_PUSH_PCREL ++ Addend: 2052 ++ - Offset: 0x48 ++ Symbol: shared ++ Type: R_LARCH_SOP_PUSH_GPREL ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_ADD ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_PUSH_ABSOLUTE ++ Addend: 12 ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_SR ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_PUSH_ABSOLUTE ++ Addend: 12 ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_SL ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_SUB ++ - Offset: 0x48 ++ Type: R_LARCH_SOP_POP_32_S_10_12 ++ - Offset: 0x50 ++ Symbol: swap ++ Type: R_LARCH_SOP_PUSH_PLT_PCREL ++ - Offset: 0x50 ++ Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 ++ - Name: .data ++ Type: SHT_PROGBITS ++ Flags: [ SHF_WRITE, SHF_ALLOC ] ++ AddressAlign: 0x10 ++ Content: '' ++ - Name: .bss ++ Type: SHT_NOBITS ++ Flags: [ SHF_WRITE, SHF_ALLOC ] ++ AddressAlign: 0x10 ++ ++Symbols: ++ - Name: a.c ++ Type: STT_FILE ++ - Name: _GLOBAL_OFFSET_TABLE_ ++ - Name: foo ++ Type: STT_FUNC ++ Section: .text ++ Size: 0x24 ++ - Name: main ++ Type: STT_FUNC ++ Section: .text ++ Value: 0x28 ++ Size: 0x4C ++ - Name: shared ++ - Name: swap ++... +diff --git a/llvm/test/Object/LoongArch/lit.local.cfg b/llvm/test/Object/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000..2b5a4893e +--- /dev/null ++++ b/llvm/test/Object/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if not 'LoongArch' in config.root.targets: ++ config.unsupported = True +diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp +index ae2dec5d1..c0440abf5 100644 +--- a/llvm/tools/llvm-readobj/ELFDumper.cpp ++++ b/llvm/tools/llvm-readobj/ELFDumper.cpp +@@ -1648,6 +1648,12 @@ const EnumEntry ElfHeaderAVRFlags[] = { + ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"), + }; + ++static const EnumEntry ElfHeaderLoongArchFlags[] = { ++ ENUM_ENT(EF_LARCH_ABI_LP64, "LP64") ++ // FIXME: Change these and add more flags in future when all ABIs definition were finalized. ++ // See current definitions: ++ // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version ++}; + + const EnumEntry ElfSymOtherFlags[] = { + LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL), +@@ -3357,6 +3363,8 @@ template void GNUELFDumper::printFileHeaders() { + else if (e.e_machine == EM_AVR) + ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); ++ else if (e.e_machine == EM_LOONGARCH) ++ ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags)); + Str = "0x" + utohexstr(e.e_flags); + if (!ElfFlags.empty()) + Str = Str + ", " + ElfFlags; +@@ -6507,6 +6515,8 @@ template void LLVMELFDumper::printFileHeaders() { + else if (E.e_machine == EM_AVR) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); ++ else if (E.e_machine == EM_LOONGARCH) ++ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags)); + else + W.printFlags("Flags", E.e_flags); + W.printNumber("HeaderSize", E.e_ehsize); +diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn +new file mode 100644 +index 000000000..cc3bb49a6 +--- /dev/null ++++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn +@@ -0,0 +1,24 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenAsmMatcher") { ++ visibility = [ ":AsmParser" ] ++ args = [ "-gen-asm-matcher" ] ++ td_file = "../LoongArch.td" ++} ++ ++static_library("AsmParser") { ++ output_name = "LLVMLoongArchAsmParser" ++ deps = [ ++ ":LoongArchGenAsmMatcher", ++ "//llvm/lib/MC", ++ "//llvm/lib/MC/MCParser", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/MCTargetDesc", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchAsmParser.cpp", ++ ] ++} +diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn +new file mode 100644 +index 000000000..e89db5200 +--- /dev/null ++++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn +@@ -0,0 +1,102 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenCallingConv") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-callingconv" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenDAGISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-dag-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenFastISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-fast-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenGlobalISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-global-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenMCPseudoLowering") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-pseudo-lowering" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenRegisterBank") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-register-bank" ] ++ td_file = "LoongArch.td" ++} ++ ++static_library("LLVMLoongArchCodeGen") { ++ deps = [ ++ ":LoongArchGenCallingConv", ++ ":LoongArchGenDAGISel", ++ ":LoongArchGenFastISel", ++ ":LoongArchGenGlobalISel", ++ ":LoongArchGenMCPseudoLowering", ++ ":LoongArchGenRegisterBank", ++ "MCTargetDesc", ++ "TargetInfo", ++ "//llvm/include/llvm/Config:llvm-config", ++ "//llvm/lib/Analysis", ++ "//llvm/lib/CodeGen", ++ "//llvm/lib/CodeGen/AsmPrinter", ++ "//llvm/lib/CodeGen/GlobalISel", ++ "//llvm/lib/CodeGen/SelectionDAG", ++ "//llvm/lib/IR", ++ "//llvm/lib/MC", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target", ++ ] ++ include_dirs = [ "." ] ++ sources = [ ++ "LoongArchAnalyzeImmediate.cpp", ++ "LoongArchAsmPrinter.cpp", ++ "LoongArchCCState.cpp", ++ "LoongArchCallLowering.cpp", ++ "LoongArchConstantIslandPass.cpp", ++ "LoongArchDelaySlotFiller.cpp", ++ "LoongArchExpandPseudo.cpp", ++ "LoongArchFrameLowering.cpp", ++ "LoongArchISelDAGToDAG.cpp", ++ "LoongArchISelLowering.cpp", ++ "LoongArchInstrInfo.cpp", ++ "LoongArchInstructionSelector.cpp", ++ "LoongArchLegalizerInfo.cpp", ++ "LoongArchMCInstLower.cpp", ++ "LoongArchMachineFunction.cpp", ++ "LoongArchModuleISelDAGToDAG.cpp", ++ "LoongArchOptimizePICCall.cpp", ++ "LoongArchPreLegalizerCombiner.cpp", ++ "LoongArchRegisterBankInfo.cpp", ++ "LoongArchRegisterInfo.cpp", ++ "LoongArchSubtarget.cpp", ++ "LoongArchTargetMachine.cpp", ++ "LoongArchTargetObjectFile.cpp", ++ ] ++} ++ ++# This is a bit different from most build files: Due to this group ++# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this ++# target, which pulls in the code in this directory *and all subdirectories*. ++# For most other directories, "//llvm/lib/Foo" only pulls in the code directly ++# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this ++# different behavior. ++group("LoongArch") { ++ deps = [ ++ ":LLVMLoongArchCodeGen", ++ "AsmParser", ++ "Disassembler", ++ "MCTargetDesc", ++ "TargetInfo", ++ ] ++} +diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn +new file mode 100644 +index 000000000..0a9b4cf59 +--- /dev/null ++++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn +@@ -0,0 +1,23 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenDisassemblerTables") { ++ visibility = [ ":Disassembler" ] ++ args = [ "-gen-disassembler" ] ++ td_file = "../LoongArch.td" ++} ++ ++static_library("Disassembler") { ++ output_name = "LLVMLoongArchDisassembler" ++ deps = [ ++ ":LoongArchGenDisassemblerTables", ++ "//llvm/lib/MC/MCDisassembler", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/MCTargetDesc", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchDisassembler.cpp", ++ ] ++} +diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn +new file mode 100644 +index 000000000..f0b96c965 +--- /dev/null ++++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn +@@ -0,0 +1,74 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenAsmWriter") { ++ visibility = [ ":MCTargetDesc" ] ++ args = [ "-gen-asm-writer" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenInstrInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-instr-info" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenMCCodeEmitter") { ++ visibility = [ ":MCTargetDesc" ] ++ args = [ "-gen-emitter" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenRegisterInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-register-info" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenSubtargetInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-subtarget" ] ++ td_file = "../LoongArch.td" ++} ++ ++# This should contain tablegen targets generating .inc files included ++# by other targets. .inc files only used by .cpp files in this directory ++# should be in deps on the static_library instead. ++group("tablegen") { ++ visibility = [ ++ ":MCTargetDesc", ++ "../TargetInfo", ++ ] ++ public_deps = [ ++ ":LoongArchGenInstrInfo", ++ ":LoongArchGenRegisterInfo", ++ ":LoongArchGenSubtargetInfo", ++ ] ++} ++ ++static_library("MCTargetDesc") { ++ output_name = "LLVMLoongArchDesc" ++ public_deps = [ ":tablegen" ] ++ deps = [ ++ ":LoongArchGenAsmWriter", ++ ":LoongArchGenMCCodeEmitter", ++ "//llvm/lib/MC", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ "LoongArchABIFlagsSection.cpp", ++ "LoongArchABIInfo.cpp", ++ "LoongArchAsmBackend.cpp", ++ "LoongArchELFObjectWriter.cpp", ++ "LoongArchELFStreamer.cpp", ++ "LoongArchInstPrinter.cpp", ++ "LoongArchMCAsmInfo.cpp", ++ "LoongArchMCCodeEmitter.cpp", ++ "LoongArchMCExpr.cpp", ++ "LoongArchMCTargetDesc.cpp", ++ "LoongArchNaClELFStreamer.cpp", ++ "LoongArchOptionRecord.cpp", ++ "LoongArchTargetStreamer.cpp", ++ ] ++} +diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn +new file mode 100644 +index 000000000..a476bdd5f +--- /dev/null ++++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn +@@ -0,0 +1,9 @@ ++static_library("TargetInfo") { ++ output_name = "LLVMLoongArchInfo" ++ deps = [ "//llvm/lib/Support" ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchTargetInfo.cpp", ++ ] ++} diff --git a/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch b/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch new file mode 100644 index 0000000..5da32f6 --- /dev/null +++ b/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch @@ -0,0 +1,20199 @@ +diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h +index 6d4f6222a..226601b20 100644 +--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h ++++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h +@@ -5099,7 +5099,7 @@ template <> + struct FloatData + { + #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ +- defined(__wasm__) || defined(__riscv) ++ defined(__wasm__) || defined(__riscv) || defined(__loongarch__) + static const size_t mangled_size = 32; + #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) + static const size_t mangled_size = 16; +diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc +index 5e008069d..6735b74c5 100644 +--- a/llvm/lib/Support/Unix/Memory.inc ++++ b/llvm/lib/Support/Unix/Memory.inc +@@ -241,8 +241,9 @@ void Memory::InvalidateInstructionCache(const void *Addr, + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +-# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ +- defined(__GNUC__) ++#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ ++ defined(__mips__)) && \ ++ defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast(Addr); + const char *End = Start + Len; +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index 2d35dfd0c..61d455518 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -239,6 +239,7 @@ public: + MCAsmParserExtension::Initialize(parser); + + parser.addAliasForDirective(".asciiz", ".asciz"); ++ parser.addAliasForDirective(".half", ".2byte"); + parser.addAliasForDirective(".hword", ".2byte"); + parser.addAliasForDirective(".word", ".4byte"); + parser.addAliasForDirective(".dword", ".8byte"); +diff --git a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +index e85fce2fd..aa297c837 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +@@ -363,6 +363,10 @@ def : LoongArchPat<(brcond RC:$cond, bb:$dst), + + defm : BrcondPats, GPR_64; + ++defm atomic_cmp_swap_8 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_16 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; ++ + let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; +@@ -387,9 +391,9 @@ let usesCustomInserter = 1 in { + def ATOMIC_SWAP_I16 : Atomic2Ops; + def ATOMIC_SWAP_I32 : Atomic2Ops; + +- def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; +- def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; +- def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; ++ defm I8_ : AtomicCmpSwapInstrs<"8", GPR32>; ++ defm I16_ : AtomicCmpSwapInstrs<"16", GPR32>; ++ defm I32_ : AtomicCmpSwapInstrs<"32", GPR32>; + + def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; +@@ -465,8 +469,29 @@ def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), + def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), + (ST_W32 GPR32:$v, addr:$a)>; + +-def : LoongArchPat<(LoongArchDBAR (i32 immz)), +- (DBAR 0)>; ++// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++// the Linux patch revealing it [1]: ++// ++// - Bit 4: kind of constraint (0: completion, 1: ordering) ++// - Bit 3: barrier for previous read (0: true, 1: false) ++// - Bit 2: barrier for previous write (0: true, 1: false) ++// - Bit 1: barrier for succeeding read (0: true, 1: false) ++// - Bit 0: barrier for succeeding write (0: true, 1: false) ++// ++// Hint 0x700: barrier for "read after read" from the same address, which is ++// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as ++// nop if such reordering is disabled on supporting newer models.) ++// ++// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ ++// ++// Implementations without support for the finer-granularity hints simply treat ++// all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++// more precise hints right away. ++ ++def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire ++def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release ++def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel ++def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + + def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; + def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +index 0f33e1db6..2e13e5b83 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +@@ -93,18 +93,6 @@ namespace { + char LoongArchExpandPseudo::ID = 0; + } + +-static bool hasDbar(MachineBasicBlock *MBB) { +- +- for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end(); +- MBBb != MBBe; ++MBBb) { +- if (MBBb->getOpcode() == LoongArch::DBAR) +- return true; +- if (MBBb->mayLoad() || MBBb->mayStore()) +- break; +- } +- return false; +-} +- + bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { +@@ -129,8 +117,8 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); +- unsigned Scratch = I->getOperand(7).getReg(); +- unsigned Scratch2 = I->getOperand(8).getReg(); ++ unsigned Scratch = I->getOperand(8).getReg(); ++ unsigned Scratch2 = I->getOperand(9).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); +@@ -201,10 +189,21 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + +- if (!hasDbar(sinkMBB)) { +- MachineBasicBlock::iterator Pos = sinkMBB->begin(); +- BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ AtomicOrdering Ordering = ++ static_cast(I->getOperand(7).getImm()); ++ int hint; ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // acquire ++ hint = 0b10100; ++ break; ++ default: ++ hint = 0x700; + } ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); +@@ -250,7 +249,7 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); +- unsigned Scratch = I->getOperand(4).getReg(); ++ unsigned Scratch = I->getOperand(5).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); +@@ -295,10 +294,21 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + +- if (!hasDbar(exitMBB)) { +- MachineBasicBlock::iterator Pos = exitMBB->begin(); +- BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ AtomicOrdering Ordering = ++ static_cast(I->getOperand(4).getImm()); ++ int hint; ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // TODO: acquire ++ hint = 0; ++ break; ++ default: ++ hint = 0x700; + } ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); +@@ -1938,7 +1948,8 @@ bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(exitMBB); +- loopMBB->addSuccessor(loopMBB); ++ if (!Opcode && IsNand) ++ loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 4c5f3ffd8..4dcac7a09 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -157,7 +157,6 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; + case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; + case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; +- case LoongArchISD::DBAR: return "LoongArchISD::DBAR"; + case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; + case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; + case LoongArchISD::VALL_ZERO: +@@ -3619,13 +3618,28 @@ LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + case LoongArch::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinary(MI, BB); + +- case LoongArch::ATOMIC_CMP_SWAP_I8: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 1); +- case LoongArch::ATOMIC_CMP_SWAP_I16: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 2); +- case LoongArch::ATOMIC_CMP_SWAP_I32: +- return emitAtomicCmpSwap(MI, BB); +- case LoongArch::ATOMIC_CMP_SWAP_I64: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwap(MI, BB); + + case LoongArch::PseudoSELECT_I: +@@ -4024,11 +4038,6 @@ LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + +- if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32 +- || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){ +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); +- } +- + MI.eraseFromParent(); + + return BB; +@@ -4220,7 +4229,6 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + +- BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) +@@ -4252,11 +4260,26 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + MachineBasicBlock * + LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { +- assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 || +- MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) && ++ unsigned Op = MI.getOpcode(); ++ assert((Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); + +- const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8; ++ const unsigned Size = (Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST) ++ ? 4 ++ : 8; + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); +@@ -4264,9 +4287,8 @@ LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + +- unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 +- ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA +- : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; ++ unsigned AtomicOp = Size == 4 ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); +@@ -4288,21 +4310,43 @@ LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); + ++ AtomicOrdering Ordering; ++ switch (Op) { ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: ++ Ordering = AtomicOrdering::Acquire; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: ++ Ordering = AtomicOrdering::AcquireRelease; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: ++ Ordering = AtomicOrdering::SequentiallyConsistent; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: ++ Ordering = AtomicOrdering::Release; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: ++ Ordering = AtomicOrdering::Monotonic; ++ break; ++ } ++ + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy, RegState::Kill) + .addReg(OldValCopy, RegState::Kill) + .addReg(NewValCopy, RegState::Kill) ++ .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); +- + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +@@ -4312,6 +4356,18 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); ++ unsigned Op = MI.getOpcode(); ++ assert((Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST) && ++ "Unsupported atomic psseudo for EmitAtomicCmpSwapPartword."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); +@@ -4340,9 +4396,8 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + unsigned Mask3 = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); +- unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8 +- ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA +- : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; ++ unsigned AtomicOp = Size == 1 ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; + + // The scratch registers here with the EarlyClobber | Define | Dead | Implicit + // flags are used to coerce the register allocator and the machine verifier to +@@ -4427,11 +4482,33 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) + .addReg(MaskedNewVal).addReg(ShiftAmt); + ++ AtomicOrdering Ordering; ++ switch (Op) { ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: ++ Ordering = AtomicOrdering::Acquire; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: ++ Ordering = AtomicOrdering::AcquireRelease; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: ++ Ordering = AtomicOrdering::SequentiallyConsistent; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: ++ Ordering = AtomicOrdering::Release; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: ++ Ordering = AtomicOrdering::Monotonic; ++ break; ++ } + // The purposes of the flags on the scratch registers are explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among the register chosen for the instruction. + +- BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) +@@ -4440,6 +4517,7 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) ++ .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | +@@ -4735,13 +4813,9 @@ SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + } + + SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, +- SelectionDAG &DAG) const { +- // FIXME: Need pseudo-fence for 'singlethread' fences +- // FIXME: Set SType for weaker fences where supported/appropriate. +- unsigned SType = 0; +- SDLoc DL(Op); +- return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0), +- DAG.getConstant(SType, DL, MVT::i32)); ++ SelectionDAG &DAG) const { ++ // TODO: handle SyncScope::SingleThread. ++ return Op; + } + + SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 64e06b53f..ea23b6350 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -93,8 +93,6 @@ class TargetRegisterClass; + // Software Exception Return. + EH_RETURN, + +- DBAR, +- + BSTRPICK, + BSTRINS, + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +index 53191a94d..272e1e25e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +@@ -17,8 +17,6 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + +-#define DBAR_HINT 0x700 +- + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "LoongArch.h" + #include "LoongArchRegisterInfo.h" +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 2d505ee25..96eb554c9 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -17,13 +17,13 @@ def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; + ++def SDT_REVBD : SDTypeProfile<1, 1, [SDTCisInt<0>]>; ++def LoongArchREVBD : SDNode<"LoongArchISD::REVBD", SDT_REVBD>; ++ + def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; + + def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; + +-def SDT_DBAR : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +-def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>; +- + def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + + def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, +@@ -1032,15 +1032,73 @@ class AtomicCmpSwap : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; + ++// These atomic cmpxchg PatFrags only care about the failure ordering. ++// In llvm <= 13, the PatFrags defined by multiclass `ternary_atomic_op_ord` ++// in TargetSelectionDAG.td only care about the success ordering while llvm > 13 ++// care about the `merged` ordering which is the stronger one of success and ++// failure. See https://reviews.llvm.org/D106729. But for LoongArch LL-SC we ++// only need to care about the failure ordering as explained in ++// https://github.com/llvm/llvm-project/pull/67391. So we defined these ++// PatFrags. ++multiclass ternary_atomic_op_failure_ord { ++ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Monotonic; ++ }]>; ++ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Acquire; ++ }]>; ++ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Release; ++ }]>; ++ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::AcquireRelease; ++ }]>; ++ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::SequentiallyConsistent; ++ }]>; ++} ++ ++defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; ++ ++multiclass AtomicCmpSwapInstrs { ++ def ATOMIC_CMP_SWAP_MONOTONIC : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_monotonic"), ++ RC>; ++ def ATOMIC_CMP_SWAP_ACQUIRE : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acquire"), ++ RC>; ++ def ATOMIC_CMP_SWAP_RELEASE : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_release"), ++ RC>; ++ def ATOMIC_CMP_SWAP_ACQ_REL : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acq_rel"), ++ RC>; ++ def ATOMIC_CMP_SWAP_SEQ_CST : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_seq_cst"), ++ RC>; ++} ++ + class AtomicCmpSwapPostRA : +- LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { ++ LoongArchPseudo<(outs RC:$dst), ++ (ins PtrRC:$ptr, RC:$cmp, RC:$swap, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; + } + + class AtomicCmpSwapSubwordPostRA : +- LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, +- RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { ++ LoongArchPseudo<(outs RC:$dst), ++ (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, RC:$mask2, ++ RC:$ShiftNewVal, RC:$ShiftAmt, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; + } +@@ -1062,7 +1120,7 @@ def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; + + def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] + def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; +-def REVB_D : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>; ++def REVB_D : Int_Reg2<"revb.d", GPR64Opnd, LoongArchREVBD>, R2I<0b01111>; + def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; + def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] + +@@ -1450,7 +1508,8 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; + def ATOMIC_SWAP_I64 : Atomic2Ops; +- def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; ++ ++ defm I64_ : AtomicCmpSwapInstrs<"64", GPR64>; + + def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 2677a79fa..7ba77a88a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -4734,213 +4734,8 @@ def : LASXPat<(srl + + + +-def : LASXPat<(sra +- (v32i8 (add +- (v32i8 (add (v32i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (srl +- (v32i8 ( add (v32i8( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7)) +- ) +- ) +- ) +- ) +- ), +- (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (XVAVGR_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; +- +- +-def : LASXPat<(sra +- (v16i16 (add +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (srl +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (build_vector +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15)) +- ) +- ) +- ) +- ) +- ), +- (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (XVAVGR_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; +- +- +-def : LASXPat<(sra +- (v8i32 (add +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (srl +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (build_vector +- (i32 31),(i32 31),(i32 31),(i32 31), +- (i32 31),(i32 31),(i32 31),(i32 31) +- ) +- ) +- ) +- ) +- ) +- ), +- (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)))), +- (XVAVGR_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; +- +-def : LASXPat<(sra +- (v4i64 (add +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (srl +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (build_vector +- (i64 63),(i64 63),(i64 63),(i64 63))) +- ) +- ) +- ) +- ), +- (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), +- (XVAVGR_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; +- + + +-def : LASXPat<(srl +- (v32i8 (add (v32i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; +- +-def : LASXPat<(srl +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; +- +-def : LASXPat<(srl +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; +- +-def : LASXPat<(srl +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) +- ) +- ), +- (XVAVGR_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; +- + + def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), + (XVMUH_D LASX256D:$a, LASX256D:$b)>; +@@ -5651,6 +5446,13 @@ def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), + (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; + + ++def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), ++ (v16i8 LSX128B:$src), (i64 0)), ++ (XVPERMI_Q (v32i8 LASX256B:$dst), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), ++ (i32 48))>; ++ + def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), + (v8i16 LSX128H:$src), (i64 0)), + (XVPERMI_QH (v16i16 LASX256H:$dst), +@@ -5671,3 +5473,27 @@ def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), + (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), ++ (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 LASX256D:$dst), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), ++ (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 LASX256W:$dst), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), ++ (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 LASX256H:$dst), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), ++ (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 LASX256B:$dst), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 3d0ea3901..37a0d9066 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -5090,165 +5090,6 @@ def : LSXPat<(srl + + + +-def : LSXPat<(sra +- (v16i8 (add +- (v16i8 (add (v16i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (srl +- (v16i8 ( add (v16i8( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7)) +- ) +- ) +- ) +- ) +- ), +- (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (VAVGR_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; +- +-def : LSXPat<(sra +- (v8i16 (add +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (srl +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15)) +- ) +- ) +- ) +- ) +- ), +- (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (VAVGR_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; +- +-def : LSXPat<(sra +- (v4i32 (add +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (srl +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) +- ) +- ) +- ) +- ) +- ), +- (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), +- (VAVGR_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; +- +-def : LSXPat<(sra +- (v2i64 (add +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (srl +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (build_vector (i64 63),(i64 63))) +- ) +- ) +- ) +- ), +- (v2i64 (build_vector (i64 1),(i64 1)))), +- (VAVGR_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; +- +- +- +- +-def : LSXPat<(srl +- (v16i8 (add (v16i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; +- +-def : LSXPat<(srl +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; +- +-def : LSXPat<(srl +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; +- +-def : LSXPat<(srl +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (build_vector (i64 1),(i64 1)) +- ) +- ), +- (VAVGR_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index b9ba9e536..75b7838bf 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -77,11 +77,19 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixu + } + + Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { +- return StringSwitch>(Name) +- .Case("R_LARCH_NONE", (MCFixupKind)LoongArch::fixup_LARCH_NONE) +- .Case("R_LARCH_32", FK_Data_4) +- .Case("R_LARCH_64", FK_Data_8) +- .Default(MCAsmBackend::getFixupKind(Name)); ++ if (STI.getTargetTriple().isOSBinFormatELF()) { ++ unsigned Type = llvm::StringSwitch(Name) ++#define ELF_RELOC(X, Y) .Case(#X, Y) ++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" ++#undef ELF_RELOC ++ .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) ++ .Case("BFD_RELOC_32", ELF::R_LARCH_32) ++ .Case("BFD_RELOC_64", ELF::R_LARCH_64) ++ .Default(-1u); ++ if (Type != -1u) ++ return static_cast(FirstLiteralRelocationKind + Type); ++ } ++ return None; + } + + const MCFixupKindInfo &LoongArchAsmBackend:: +@@ -143,6 +151,11 @@ getFixupKindInfo(MCFixupKind Kind) const { + { "fixup_LARCH_SUB64", 0, 0, 0}, + }; + ++ // Fixup kinds from .reloc directive are like R_LARCH_NONE. They do not ++ // require any extra processing. ++ if (Kind >= FirstLiteralRelocationKind) ++ return MCAsmBackend::getFixupKindInfo(FK_NONE); ++ + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + +@@ -173,6 +186,10 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { ++ // .reloc directive should force relocation. ++ if (Fixup.getKind() >= FirstLiteralRelocationKind) ++ return true; ++ + const unsigned FixupKind = Fixup.getKind(); + switch (FixupKind) { + default: +@@ -209,6 +226,6 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCTargetOptions &Options) { + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( + STI.getTargetTriple(), STI.getCPU(), Options); +- return new LoongArchAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), +- ABI.IsLPX32()); ++ return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(), ++ STI.getCPU(), ABI.IsLPX32()); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 45ae6af44..d96791f7d 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -28,14 +28,15 @@ class MCSymbolELF; + class Target; + + class LoongArchAsmBackend : public MCAsmBackend { ++ const MCSubtargetInfo &STI; + Triple TheTriple; + bool IsLPX32; + + public: +- LoongArchAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, +- StringRef CPU, bool LPX32) +- : MCAsmBackend(support::little), +- TheTriple(TT), IsLPX32(LPX32) { ++ LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T, ++ const MCRegisterInfo &MRI, const Triple &TT, ++ StringRef CPU, bool LPX32) ++ : MCAsmBackend(support::little), STI(STI), TheTriple(TT), IsLPX32(LPX32) { + assert(TT.isLittleEndian()); + } + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index e00b9af9d..c08f3ba0c 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -62,6 +62,9 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + ///XXX:Reloc + unsigned Kind = (unsigned)Fixup.getKind(); + ++ if (Kind >= FirstLiteralRelocationKind) ++ return Kind - FirstLiteralRelocationKind; ++ + switch (Kind) { + default: + return ELF::R_LARCH_NONE; +diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +index d242083f9..1a2d89fbb 100644 +--- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp ++++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +@@ -27,6 +27,12 @@ + + using namespace llvm; + ++static cl::opt ++EnableLoongson3FixLLSC("mips-fix-loongson3-llsc", cl::Hidden, ++ cl::desc("Work around loongson3 llsc erratum"), ++ cl::init(true)); ++ ++ + #define DEBUG_TYPE "mips-pseudo" + + namespace { +@@ -188,6 +194,21 @@ bool MipsExpandPseudo::expandAtomicCmpSwapSubword( + .addImm(ShiftImm); + } + ++ if (EnableLoongson3FixLLSC) { ++ bool Has_sync = false; ++ for (MachineBasicBlock::iterator MBBb = sinkMBB->begin(), MBBe = sinkMBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ ++ if (!Has_sync) { ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); ++ } ++ } ++ + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); +@@ -289,6 +310,20 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + ++ if (EnableLoongson3FixLLSC) { ++ bool Has_sync = false; ++ for (MachineBasicBlock::iterator MBBb = exitMBB->begin(), MBBe = exitMBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ if (!Has_sync) { ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); ++ } ++ } ++ + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); +diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +index 3274e36ab..fadba1472 100644 +--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +@@ -104,6 +104,7 @@ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; + static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; + static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; + static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; ++static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 37; + static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; + static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; + static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; +@@ -481,6 +482,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32; + bool IsMIPS32 = TargetTriple.isMIPS32(); + bool IsMIPS64 = TargetTriple.isMIPS64(); ++ bool IsLoongArch64 = TargetTriple.isLoongArch64(); + bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; +@@ -548,7 +550,9 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + Mapping.Offset = kWindowsShadowOffset64; + } else if (IsMIPS64) + Mapping.Offset = kMIPS64_ShadowOffset64; +- else if (IsIOS) ++ else if (IsLoongArch64) { ++ Mapping.Offset = kLoongArch64_ShadowOffset64; ++ } else if (IsIOS) + Mapping.Offset = kDynamicShadowSentinel; + else if (IsMacOS && IsAArch64) + Mapping.Offset = kDynamicShadowSentinel; +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index 4606bd5de..70155a590 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -385,6 +385,14 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = { + #endif + }; + ++// loongarch64 Linux ++static const MemoryMapParams Linux_LOONGARCH64_MemoryMapParams = { ++ 0, // AndMask (not used) ++ 0x008000000000, // XorMask ++ 0, // ShadowBase (not used) ++ 0x002000000000, // OriginBase ++}; ++ + // mips64 Linux + static const MemoryMapParams Linux_MIPS64_MemoryMapParams = { + 0, // AndMask (not used) +@@ -454,6 +462,11 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { + &Linux_X86_64_MemoryMapParams, + }; + ++static const PlatformMemoryMapParams Linux_LOONGARCH_MemoryMapParams = { ++ nullptr, ++ &Linux_LOONGARCH64_MemoryMapParams, ++}; ++ + static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { + nullptr, + &Linux_MIPS64_MemoryMapParams, +@@ -516,6 +529,7 @@ public: + private: + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; ++ friend struct VarArgLoongArch64Helper; + friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; +@@ -937,6 +951,9 @@ void MemorySanitizer::initializeModule(Module &M) { + case Triple::x86: + MapParams = Linux_X86_MemoryMapParams.bits32; + break; ++ case Triple::loongarch64: ++ MapParams = Linux_LOONGARCH_MemoryMapParams.bits64; ++ break; + case Triple::mips64: + case Triple::mips64el: + MapParams = Linux_MIPS_MemoryMapParams.bits64; +@@ -4445,6 +4462,117 @@ struct VarArgAMD64Helper : public VarArgHelper { + } + }; + ++/// LoongArch64-specific implementation of VarArgHelper. ++struct VarArgLoongArch64Helper : public VarArgHelper { ++ Function &F; ++ MemorySanitizer &MS; ++ MemorySanitizerVisitor &MSV; ++ Value *VAArgTLSCopy = nullptr; ++ Value *VAArgSize = nullptr; ++ ++ SmallVector VAStartInstrumentationList; ++ ++ VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, ++ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} ++ ++ void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { ++ unsigned VAArgOffset = 0; ++ const DataLayout &DL = F.getParent()->getDataLayout(); ++ for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), ++ End = CB.arg_end(); ++ ArgIt != End; ++ArgIt) { ++ Triple TargetTriple(F.getParent()->getTargetTriple()); ++ Value *A = *ArgIt; ++ Value *Base; ++ uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); ++ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); ++ VAArgOffset += ArgSize; ++ VAArgOffset = alignTo(VAArgOffset, 8); ++ if (!Base) ++ continue; ++ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); ++ } ++ ++ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); ++ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of ++ // a new class member i.e. it is the total size of all VarArgs. ++ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); ++ } ++ ++ /// Compute the shadow address for a given va_arg. ++ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, ++ unsigned ArgOffset, unsigned ArgSize) { ++ // Make sure we don't overflow __msan_va_arg_tls. ++ if (ArgOffset + ArgSize > kParamTLSSize) ++ return nullptr; ++ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); ++ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); ++ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), ++ "_msarg"); ++ } ++ ++ void visitVAStartInst(VAStartInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void visitVACopyInst(VACopyInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void finalizeInstrumentation() override { ++ assert(!VAArgSize && !VAArgTLSCopy && ++ "finalizeInstrumentation called twice"); ++ IRBuilder<> IRB(MSV.FnPrologueEnd); ++ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); ++ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), ++ VAArgSize); ++ ++ if (!VAStartInstrumentationList.empty()) { ++ // If there is a va_start in this function, make a backup copy of ++ // va_arg_tls somewhere in the function entry block. ++ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); ++ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); ++ } ++ ++ // Instrument va_start. ++ // Copy va_list shadow from the backup copy of the TLS contents. ++ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { ++ CallInst *OrigInst = VAStartInstrumentationList[i]; ++ IRBuilder<> IRB(OrigInst->getNextNode()); ++ Value *VAListTag = OrigInst->getArgOperand(0); ++ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); ++ Value *RegSaveAreaPtrPtr = ++ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ++ PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); ++ Value *RegSaveAreaPtr = ++ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); ++ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = ++ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), ++ Alignment, /*isStore*/ true); ++ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, ++ CopySize); ++ } ++ } ++}; ++ + /// MIPS64-specific implementation of VarArgHelper. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; +@@ -5344,6 +5472,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.getArch() == Triple::loongarch64) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp +index c60efa465..fc3e6745c 100644 +--- a/llvm/lib/XRay/InstrumentationMap.cpp ++++ b/llvm/lib/XRay/InstrumentationMap.cpp +@@ -61,6 +61,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, + if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) || + !(ObjFile.getBinary()->getArch() == Triple::x86_64 || + ObjFile.getBinary()->getArch() == Triple::ppc64le || ++ ObjFile.getBinary()->getArch() == Triple::loongarch64 || + ObjFile.getBinary()->getArch() == Triple::arm || + ObjFile.getBinary()->getArch() == Triple::aarch64)) + return make_error( +diff --git a/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll +new file mode 100644 +index 000000000..795b5c6b2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll +@@ -0,0 +1,902 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB0_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB0_1 ++; LA64-NEXT: .LBB0_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB1_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB1_1 ++; LA64-NEXT: .LBB1_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB2_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB2_1 ++; LA64-NEXT: .LBB2_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB3_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB3_1 ++; LA64-NEXT: .LBB3_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB4_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB4_1 ++; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB5_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB5_1 ++; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB6_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB6_1 ++; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i32_release_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB7_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB7_1 ++; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i32_acquire_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB8_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB8_1 ++; LA64-NEXT: .LBB8_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_acquire_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB9_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB9_1 ++; LA64-NEXT: .LBB9_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i64_release_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB10_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB10_1 ++; LA64-NEXT: .LBB10_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i64_acquire_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB11_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB11_1 ++; LA64-NEXT: .LBB11_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_acquire_acquire_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB12_1 ++; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_acquire_acquire_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r4 ++; LA64-NEXT: sll.w $r5, $r5, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB13_1 ++; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_acquire_acquire_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r7, $r5, 0 ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r5, $r4, 0 ++; LA64-NEXT: bne $r5, $r7, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB14_1 ++; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: move $r4, $r5 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_acquire_acquire_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB15_1 ++; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: move $r4, $r7 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_acquire_acquire_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r11, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB16_1 ++; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.b $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_acquire_acquire_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r11, $r5, $r4 ++; LA64-NEXT: sll.w $r11, $r11, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB17_1 ++; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.h $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_acquire_acquire_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB18_1 ++; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_acquire_acquire_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB19_1 ++; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB20_1 ++; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB21_1 ++; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_monotonic_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB22_1 ++; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB23_1 ++; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_monotonic_monotonic_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB24_1 ++; LA64-NEXT: .LBB24_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_monotonic_monotonic_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r4 ++; LA64-NEXT: sll.w $r5, $r5, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB25_1 ++; LA64-NEXT: .LBB25_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_monotonic_monotonic_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r7, $r5, 0 ++; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r5, $r4, 0 ++; LA64-NEXT: bne $r5, $r7, .LBB26_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB26_1 ++; LA64-NEXT: .LBB26_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: move $r4, $r5 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_monotonic_monotonic_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB27_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB27_1 ++; LA64-NEXT: .LBB27_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: move $r4, $r7 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_monotonic_monotonic_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r11, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB28_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB28_1 ++; LA64-NEXT: .LBB28_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.b $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_monotonic_monotonic_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r11, $r5, $r4 ++; LA64-NEXT: sll.w $r11, $r11, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB29_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB29_1 ++; LA64-NEXT: .LBB29_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.h $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_monotonic_monotonic_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB30_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB30_1 ++; LA64-NEXT: .LBB30_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_monotonic_monotonic_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB31_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB31_1 ++; LA64-NEXT: .LBB31_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +index d5c3e0dad..ba454ab40 100644 +--- a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll ++++ b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +@@ -14,7 +14,6 @@ define void @umax_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -50,7 +49,6 @@ define void @umax_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -85,7 +83,6 @@ define void @max_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -121,7 +118,6 @@ define void @max_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -157,7 +153,6 @@ define void @umin_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -193,7 +188,6 @@ define void @umin_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -228,7 +222,6 @@ define void @min_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -264,7 +257,6 @@ define void @min_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -300,7 +292,6 @@ define void @or_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: or $r11, $r10, $r5 +@@ -332,7 +323,6 @@ define void @or_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: or $r11, $r10, $r5 +@@ -364,7 +354,6 @@ define void @add_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: add.w $r11, $r10, $r5 +@@ -396,7 +385,6 @@ define void @add_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: add.w $r11, $r10, $r5 +@@ -428,7 +416,6 @@ define void @sub_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: sub.w $r11, $r10, $r5 +@@ -460,7 +447,6 @@ define void @sub_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: sub.w $r11, $r10, $r5 +@@ -492,7 +478,6 @@ define void @and_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -524,7 +509,6 @@ define void @and_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -556,7 +540,6 @@ define void @nand_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -589,7 +572,6 @@ define void @nand_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -622,7 +604,6 @@ define void @xor_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: xor $r11, $r10, $r5 +@@ -654,7 +635,6 @@ define void @xor_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: xor $r11, $r10, $r5 +@@ -686,7 +666,6 @@ define void @xchg_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r5, $r7 +@@ -717,7 +696,6 @@ define void @xchg_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r5, $r7 +@@ -751,7 +729,6 @@ define void @cmpxchg_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r6, $r6, $r4 + ; CHECK-NEXT: andi $r5, $r5, 255 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r11, $r7, 0 + ; CHECK-NEXT: and $r12, $r11, $r8 +@@ -762,7 +739,7 @@ define void @cmpxchg_8(i8* %ptr) { + ; CHECK-NEXT: sc.w $r11, $r7, 0 + ; CHECK-NEXT: beq $r11, $zero, .LBB22_1 + ; CHECK-NEXT: .LBB22_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 20 + ; CHECK-NEXT: srl.w $r10, $r12, $r4 + ; CHECK-NEXT: ext.w.b $r10, $r10 + ; CHECK-NEXT: # %bb.4: +@@ -788,7 +765,6 @@ define void @cmpxchg_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r6, $r6, $r4 + ; CHECK-NEXT: and $r5, $r5, $r8 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r11, $r7, 0 + ; CHECK-NEXT: and $r12, $r11, $r9 +@@ -799,7 +775,7 @@ define void @cmpxchg_16(i16* %ptr) { + ; CHECK-NEXT: sc.w $r11, $r7, 0 + ; CHECK-NEXT: beq $r11, $zero, .LBB23_1 + ; CHECK-NEXT: .LBB23_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 20 + ; CHECK-NEXT: srl.w $r8, $r12, $r4 + ; CHECK-NEXT: ext.w.h $r8, $r8 + ; CHECK-NEXT: # %bb.4: +diff --git a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +index ce400fd43..61a24cd5d 100644 +--- a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll ++++ b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +@@ -1,12 +1,12 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s ++; RUN: llc -mtriple=loongarch64 --verify-machineinstrs -o - %s | FileCheck %s + + + define void @umax_32(i32* %ptr) { + ; CHECK-LABEL: umax_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.wu $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -18,7 +18,7 @@ define void @umax_64(i64* %ptr) { + ; CHECK-LABEL: umax_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -30,7 +30,7 @@ define void @max_32(i32* %ptr) { + ; CHECK-LABEL: max_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -42,7 +42,7 @@ define void @max_64(i64* %ptr) { + ; CHECK-LABEL: max_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -55,7 +55,7 @@ define void @umin_32(i32* %ptr) { + ; CHECK-LABEL: umin_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.wu $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -67,7 +67,7 @@ define void @umin_64(i64* %ptr) { + ; CHECK-LABEL: umin_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -79,7 +79,7 @@ define void @min_32(i32* %ptr) { + ; CHECK-LABEL: min_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -91,7 +91,7 @@ define void @min_64(i64* %ptr) { + ; CHECK-LABEL: min_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -104,7 +104,7 @@ define void @or_32(i32* %ptr) { + ; CHECK-LABEL: or_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amor_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -116,7 +116,7 @@ define void @or_64(i64* %ptr) { + ; CHECK-LABEL: or_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -129,7 +129,7 @@ define void @add_32(i32* %ptr) { + ; CHECK-LABEL: add_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amadd_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -141,7 +141,7 @@ define void @add_64(i64* %ptr) { + ; CHECK-LABEL: add_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -154,7 +154,7 @@ define void @sub_32(i32* %ptr) { + ; CHECK-LABEL: sub_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: sub.w $r7, $zero, $r5 + ; CHECK-NEXT: amadd_db.w $r6, $r7, $r4, 0 + ; CHECK-NEXT: # %bb.2: +@@ -167,7 +167,7 @@ define void @sub_64(i64* %ptr) { + ; CHECK-LABEL: sub_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: sub.d $r7, $zero, $r5 + ; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 + ; CHECK-NEXT: # %bb.2: +@@ -181,7 +181,7 @@ define void @and_32(i32* %ptr) { + ; CHECK-LABEL: and_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amand_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -193,7 +193,7 @@ define void @and_64(i64* %ptr) { + ; CHECK-LABEL: and_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -213,7 +213,6 @@ define void @nand_32(i32* %ptr) { + ; CHECK-NEXT: sc.w $r7, $r4, 0 + ; CHECK-NEXT: beq $r7, $zero, .LBB16_1 + ; CHECK-NEXT: # %bb.2: +-; CHECK-NEXT: dbar 1792 + ; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i32* %ptr, i32 100 seq_cst + ret void +@@ -230,7 +229,6 @@ define void @nand_64(i64* %ptr) { + ; CHECK-NEXT: sc.d $r7, $r4, 0 + ; CHECK-NEXT: beq $r7, $zero, .LBB17_1 + ; CHECK-NEXT: # %bb.2: +-; CHECK-NEXT: dbar 1792 + ; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i64* %ptr, i64 100 seq_cst + ret void +@@ -241,7 +239,7 @@ define void @xor_32(i32* %ptr) { + ; CHECK-LABEL: xor_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amxor_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -253,7 +251,7 @@ define void @xor_64(i64* %ptr) { + ; CHECK-LABEL: xor_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -266,7 +264,7 @@ define void @xchg_32(i32* %ptr) { + ; CHECK-LABEL: xchg_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amswap_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -278,7 +276,7 @@ define void @xchg_64(i64* %ptr) { + ; CHECK-LABEL: xchg_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -291,7 +289,6 @@ define void @cmpxchg_32(i32* %ptr) { + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 1 + ; CHECK-NEXT: ori $r6, $zero, 100 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r7, $r4, 0 + ; CHECK-NEXT: bne $r7, $r6, .LBB22_3 +@@ -300,7 +297,7 @@ define void @cmpxchg_32(i32* %ptr) { + ; CHECK-NEXT: sc.w $r8, $r4, 0 + ; CHECK-NEXT: beq $r8, $zero, .LBB22_1 + ; CHECK-NEXT: .LBB22_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: jr $ra + %ret = cmpxchg i32* %ptr, i32 100, i32 1 seq_cst seq_cst + ret void +@@ -311,7 +308,6 @@ define void @cmpxchg_64(i64* %ptr) { + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 1 + ; CHECK-NEXT: addi.d $r6, $zero, 100 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.d $r7, $r4, 0 + ; CHECK-NEXT: bne $r7, $r6, .LBB23_3 +@@ -320,7 +316,7 @@ define void @cmpxchg_64(i64* %ptr) { + ; CHECK-NEXT: sc.d $r8, $r4, 0 + ; CHECK-NEXT: beq $r8, $zero, .LBB23_1 + ; CHECK-NEXT: .LBB23_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: jr $ra + %ret = cmpxchg i64* %ptr, i64 100, i64 1 seq_cst seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll +new file mode 100644 +index 000000000..7ef963cc6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll +@@ -0,0 +1,1776 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define float @float_fadd_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB0_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB0_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB0_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB0_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB0_2 ++; CHECK-NEXT: .LBB0_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB0_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB1_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB1_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI1_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI1_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI1_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI1_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB1_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB1_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB1_2 ++; CHECK-NEXT: .LBB1_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB1_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB2_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB2_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB2_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB2_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB2_2 ++; CHECK-NEXT: .LBB2_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB2_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB3_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB3_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB3_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB3_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB3_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB3_2 ++; CHECK-NEXT: .LBB3_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB3_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB3_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB4_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB4_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB5_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI5_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI5_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI5_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI5_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB5_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB6_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB6_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB7_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB7_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB8_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB8_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB8_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB8_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB8_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB8_2 ++; CHECK-NEXT: .LBB8_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB8_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB9_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB9_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI9_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI9_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI9_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI9_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB9_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB9_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB9_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB9_2 ++; CHECK-NEXT: .LBB9_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB9_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB10_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB10_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB10_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB10_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB10_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB10_2 ++; CHECK-NEXT: .LBB10_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB10_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB11_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB11_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB11_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB11_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB11_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB11_2 ++; CHECK-NEXT: .LBB11_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB11_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB12_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB12_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB13_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: lu12i.w $r4, .LCPI13_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI13_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI13_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI13_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB13_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB14_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB14_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB15_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB15_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB16_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB16_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB16_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB16_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB16_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB16_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB16_2 ++; CHECK-NEXT: .LBB16_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB16_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB17_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB17_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI17_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI17_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI17_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI17_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB17_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB17_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB17_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB17_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB17_2 ++; CHECK-NEXT: .LBB17_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB17_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB18_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB18_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB18_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB18_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB18_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB18_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB18_2 ++; CHECK-NEXT: .LBB18_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB18_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB19_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB19_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB19_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB19_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB19_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB19_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB19_2 ++; CHECK-NEXT: .LBB19_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB19_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB20_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB20_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB21_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: lu12i.w $r4, .LCPI21_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI21_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI21_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI21_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB21_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB22_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB22_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB23_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB23_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB24_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB24_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB24_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB24_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB24_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB24_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB24_2 ++; CHECK-NEXT: .LBB24_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB24_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB25_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB25_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI25_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI25_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI25_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI25_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB25_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB25_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB25_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB25_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB25_2 ++; CHECK-NEXT: .LBB25_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB25_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB26_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB26_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB26_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB26_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB26_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB26_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB26_2 ++; CHECK-NEXT: .LBB26_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB26_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB27_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB27_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB27_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB27_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB27_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB27_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB27_2 ++; CHECK-NEXT: .LBB27_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB27_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB28_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB28_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB29_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI29_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI29_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI29_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI29_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB29_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB30_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB30_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB31_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB31_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB32_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB32_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB32_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB32_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB32_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB32_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB32_2 ++; CHECK-NEXT: .LBB32_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB32_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB33_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB33_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI33_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI33_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI33_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI33_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB33_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB33_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB33_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB33_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB33_2 ++; CHECK-NEXT: .LBB33_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB33_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB34_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB34_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB34_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB34_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB34_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB34_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB34_2 ++; CHECK-NEXT: .LBB34_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB34_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB35_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB35_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB35_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB35_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB35_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB35_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB35_2 ++; CHECK-NEXT: .LBB35_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB35_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB36_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB36_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB37_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI37_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI37_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI37_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI37_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB37_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB38_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB38_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB39_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB39_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll +new file mode 100644 +index 000000000..3e04fc53c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll +@@ -0,0 +1,1882 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s ++ ++define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB0_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB1_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB4_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB5_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB8_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB9_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB12_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB13_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB20_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB21_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB24_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB25_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB28_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB29_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB32_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB33_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB36_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB37_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB40_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB41_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB44_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB45_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB48_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB49_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB52_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB53_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB56_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB57_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB60_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB61_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB64_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB65_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB68_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB69_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB72_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB73_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB76_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB77_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/atomicrmw.ll +new file mode 100644 +index 000000000..4732ec0fa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw.ll +@@ -0,0 +1,3652 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB0_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB1_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 acquire ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB2_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB3_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB4_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 acquire ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB5_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB8_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB9_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB12_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB13_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB18_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB19_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB20_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB21_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB24_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB25_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB28_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB29_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB32_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB33_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB34_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB35_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB36_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB37_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB40_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB41_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB44_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB45_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB48_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB49_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB50_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB51_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB52_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB53_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB56_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB57_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB60_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB61_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB64_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB65_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB66_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB67_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB68_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB69_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB72_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB73_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB76_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB77_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB80_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB81_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB82_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB83_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB84_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB85_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB88_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB89_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB92_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB93_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB96_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB97_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB98_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB99_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB100_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB101_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB104_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB105_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB108_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB109_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB112_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB113_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB114_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB115_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB116_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB117_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB120_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB121_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB124_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB125_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB128_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB129_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 monotonic ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB130_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB131_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB132_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 monotonic ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB133_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB136_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB137_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB140_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB141_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB144_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB145_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB146_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB147_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB148_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB149_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB152_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB153_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB156_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB157_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll +new file mode 100644 +index 000000000..f4d1a3965 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll +@@ -0,0 +1,11 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define void @fence_singlethread() { ++; CHECK-LABEL: fence_singlethread: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ fence syncscope("singlethread") seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/fence.ll b/llvm/test/CodeGen/LoongArch/fence.ll +new file mode 100644 +index 000000000..05e2639ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fence.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @fence_acquire() nounwind { ++; LA64-LABEL: fence_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: jr $ra ++ fence acquire ++ ret void ++} ++ ++define void @fence_release() nounwind { ++; LA64-LABEL: fence_release: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 18 ++; LA64-NEXT: jr $ra ++ fence release ++ ret void ++} ++ ++define void @fence_acq_rel() nounwind { ++; LA64-LABEL: fence_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 16 ++; LA64-NEXT: jr $ra ++ fence acq_rel ++ ret void ++} ++ ++define void @fence_seq_cst() nounwind { ++; LA64-LABEL: fence_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 16 ++; LA64-NEXT: jr $ra ++ fence seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +index 51fa34606..6dbaa49b1 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +@@ -1,6 +1,18 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + ++define <32 x i8> @lasxB(<32 x i8> %d, <16 x i8> %s1) { ++; CHECK-LABEL: lasxB: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> ++ %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> ++ ret <32 x i8> %r2 ++} ++ + define <16 x i16> @lasxH(<16 x i16> %d, <8 x i16> %s1) { + ; CHECK-LABEL: lasxH: + ; CHECK: # %bb.0: # %entry +@@ -36,3 +48,51 @@ entry: + %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> + ret <4 x i64> %r2 + } ++ ++define <32 x i8> @lasxB_Hi(<32 x i8> %d, <16 x i8> %s1) { ++; CHECK-LABEL: lasxB_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> ++ %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> ++ ret <32 x i8> %r2 ++} ++ ++define <16 x i16> @lasxH_Hi(<16 x i16> %d, <8 x i16> %s1) { ++; CHECK-LABEL: lasxH_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <8 x i16> %s1, <8 x i16> poison, <16 x i32> ++ %r2 = shufflevector <16 x i16> %r1, <16 x i16> %d, <16 x i32> ++ ret <16 x i16> %r2 ++} ++ ++define <8 x i32> @lasxW_Hi(<8 x i32> %d, <4 x i32> %s1) { ++; CHECK-LABEL: lasxW_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <4 x i32> %s1, <4 x i32> poison, <8 x i32> ++ %r2 = shufflevector <8 x i32> %r1, <8 x i32> %d, <8 x i32> ++ ret <8 x i32> %r2 ++} ++ ++define <4 x i64> @lasxD_Hi(<4 x i64> %d, <2 x i64> %s1) { ++; CHECK-LABEL: lasxD_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <2 x i64> %s1, <2 x i64> poison, <4 x i32> ++ %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> ++ ret <4 x i64> %r2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll +new file mode 100644 +index 000000000..a0f3e6ebe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll +@@ -0,0 +1,106 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @lsxavgr_v32i8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <32 x i8> %0, ++ %4 = add <32 x i8> %3, %1 ++ %5 = sdiv <32 x i8> %4, ++ ret <32 x i8> %5 ++} ++ ++define <16 x i16> @lsxavgr_v16i16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i16> %0, ++ %4 = add <16 x i16> %3, %1 ++ %5 = sdiv <16 x i16> %4, ++ ret <16 x i16> %5 ++} ++ ++define <8 x i32> @lsxavgr_v8i32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i32> %0, ++ %4 = add <8 x i32> %3, %1 ++ %5 = sdiv <8 x i32> %4, ++ ret <8 x i32> %5 ++} ++ ++define <4 x i64> @lsxavgr_v4i64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i64> %0, ++ %4 = add <4 x i64> %3, %1 ++ %5 = sdiv <4 x i64> %4, ++ ret <4 x i64> %5 ++} ++ ++define <32 x i8> @lsxavgr_v32u8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v32u8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <32 x i8> %0, ++ %4 = add <32 x i8> %3, %1 ++ %5 = lshr <32 x i8> %4, ++ ret <32 x i8> %5 ++} ++ ++define <16 x i16> @lsxavgr_v16u16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v16u16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i16> %0, ++ %4 = add <16 x i16> %3, %1 ++ %5 = lshr <16 x i16> %4, ++ ret <16 x i16> %5 ++} ++ ++define <8 x i32> @lsxavgr_v8u32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v8u32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i32> %0, ++ %4 = add <8 x i32> %3, %1 ++ %5 = lshr <8 x i32> %4, ++ ret <8 x i32> %5 ++} ++ ++define <4 x i64> @lsxavgr_v4u64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v4u64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i64> %0, ++ %4 = add <4 x i64> %3, %1 ++ %5 = lshr <4 x i64> %4, ++ ret <4 x i64> %5 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll +new file mode 100644 +index 000000000..1453dabaa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @vshf_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: vshf_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $r5, 0 ++; CHECK-NEXT: xvpickve2gr.d $r5, $xr0, 3 ++; CHECK-NEXT: xvpickve2gr.d $r6, $xr0, 2 ++; CHECK-NEXT: xvpickve2gr.d $r7, $xr0, 0 ++; CHECK-NEXT: xvpickve2gr.d $r8, $xr0, 1 ++; CHECK-NEXT: revb.d $r8, $r8 ++; CHECK-NEXT: revb.d $r7, $r7 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r7, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r8, 1 ++; CHECK-NEXT: revb.d $r6, $r6 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r6, 2 ++; CHECK-NEXT: revb.d $r5, $r5 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r5, 3 ++; CHECK-NEXT: xvst $xr0, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %v1 = load <32 x i8>, ptr %a0 ++ %v2 = shufflevector <32 x i8> %v1, <32 x i8> undef, <32 x i32> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll +new file mode 100644 +index 000000000..414d4078b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll +@@ -0,0 +1,310 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define i8 @load_acquire_i8(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr acquire, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_acquire_i16(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr acquire, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_acquire_i32(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr acquire, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_acquire_i64(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr acquire, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_unordered_i8(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr unordered, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_unordered_i16(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr unordered, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_unordered_i32(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr unordered, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_unordered_i64(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr unordered, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_monotonic_i8(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr monotonic, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_monotonic_i16(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr monotonic, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_monotonic_i32(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr monotonic, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_monotonic_i64(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr monotonic, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_seq_cst_i8(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr seq_cst, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_seq_cst_i16(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr seq_cst, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_seq_cst_i32(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr seq_cst, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_seq_cst_i64(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr seq_cst, align 8 ++ ret i64 %val ++} ++ ++define void @store_release_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_release_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr release, align 1 ++ ret void ++} ++ ++define void @store_release_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_release_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr release, align 2 ++ ret void ++} ++ ++define void @store_release_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_release_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr release, align 4 ++ ret void ++} ++ ++define void @store_release_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_release_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr release, align 8 ++ ret void ++} ++ ++define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_unordered_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr unordered, align 1 ++ ret void ++} ++ ++define void @store_unordered_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_unordered_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr unordered, align 2 ++ ret void ++} ++ ++define void @store_unordered_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_unordered_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr unordered, align 4 ++ ret void ++} ++ ++define void @store_unordered_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_unordered_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr unordered, align 8 ++ ret void ++} ++ ++define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_monotonic_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr monotonic, align 1 ++ ret void ++} ++ ++define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_monotonic_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr monotonic, align 2 ++ ret void ++} ++ ++define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_monotonic_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr monotonic, align 4 ++ ret void ++} ++ ++define void @store_monotonic_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_monotonic_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr monotonic, align 8 ++ ret void ++} ++ ++define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_seq_cst_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr seq_cst, align 1 ++ ret void ++} ++ ++define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_seq_cst_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr seq_cst, align 2 ++ ret void ++} ++ ++define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_seq_cst_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr seq_cst, align 4 ++ ret void ++} ++ ++define void @store_seq_cst_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_seq_cst_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr seq_cst, align 8 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll +new file mode 100644 +index 000000000..8441ed1b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll +@@ -0,0 +1,106 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @lsxvavg_v16i8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i8> %0, ++ %4 = add <16 x i8> %3, %1 ++ %5 = sdiv <16 x i8> %4, ++ ret <16 x i8> %5 ++} ++ ++define <8 x i16> @lsxvavg_v8i16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i16> %0, ++ %4 = add <8 x i16> %3, %1 ++ %5 = sdiv <8 x i16> %4, ++ ret <8 x i16> %5 ++} ++ ++define <4 x i32> @lsxvavg_v4i32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i32> %0, ++ %4 = add <4 x i32> %3, %1 ++ %5 = sdiv <4 x i32> %4, ++ ret <4 x i32> %5 ++} ++ ++define <2 x i64> @lsxvavg_v2i64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <2 x i64> %0, ++ %4 = add <2 x i64> %3, %1 ++ %5 = sdiv <2 x i64> %4, ++ ret <2 x i64> %5 ++} ++ ++define <16 x i8> @lsxvavg_v16u8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v16u8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i8> %0, ++ %4 = add <16 x i8> %3, %1 ++ %5 = lshr <16 x i8> %4, ++ ret <16 x i8> %5 ++} ++ ++define <8 x i16> @lsxvavg_v8u16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v8u16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i16> %0, ++ %4 = add <8 x i16> %3, %1 ++ %5 = lshr <8 x i16> %4, ++ ret <8 x i16> %5 ++} ++ ++define <4 x i32> @lsxvavg_v4u32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v4u32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i32> %0, ++ %4 = add <4 x i32> %3, %1 ++ %5 = lshr <4 x i32> %4, ++ ret <4 x i32> %5 ++} ++ ++define <2 x i64> @lsxvavg_v2u64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v2u64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <2 x i64> %0, ++ %4 = add <2 x i64> %3, %1 ++ %5 = lshr <2 x i64> %4, ++ ret <2 x i64> %5 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll +new file mode 100644 +index 000000000..25e4eb072 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll +@@ -0,0 +1,20 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @vshf_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: vshf_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $r5, 0 ++; CHECK-NEXT: vpickve2gr.d $r5, $vr0, 0 ++; CHECK-NEXT: vpickve2gr.d $r6, $vr0, 1 ++; CHECK-NEXT: revb.d $r6, $r6 ++; CHECK-NEXT: revb.d $r5, $r5 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $r5, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $r6, 1 ++; CHECK-NEXT: vst $vr0, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %v1 = load <16 x i8>, ptr %a0 ++ %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll +new file mode 100644 +index 000000000..a360bddb7 +--- /dev/null ++++ b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll +@@ -0,0 +1,7548 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R6O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS4 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MM32 ++ ++; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++ ++; Keep one big-endian check so that we don't reduce testing, but don't add more ++; since endianness doesn't affect the body of the atomic operations. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32EB ++ ++@x = common global i32 0, align 4 ++ ++define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB0_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB0_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB0_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB0_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB0_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB0_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB0_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB0_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB0_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB0_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB0_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB0_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB0_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB0_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB0_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB0_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB0_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB0_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB0_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB0_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB0_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB0_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB0_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB0_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB0_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB0_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB0_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB0_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadSub32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB1_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: subu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB1_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadSub32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB1_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: subu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB1_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadSub32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB1_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: subu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB1_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadSub32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB1_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: subu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB1_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadSub32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB1_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: subu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadSub32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB1_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: subu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB1_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB1_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: subu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB1_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB1_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: subu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB1_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB1_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: subu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB1_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB1_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: subu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadSub32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB1_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: subu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB1_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB1_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: subu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB1_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB1_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: subu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB1_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB1_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: subu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB1_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadSub32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB1_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: subu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB1_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadXor32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB2_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: xor $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB2_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadXor32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB2_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: xor $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB2_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadXor32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB2_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: xor $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB2_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadXor32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB2_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: xor $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB2_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadXor32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB2_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: xor $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadXor32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB2_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: xor $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB2_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadXor32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB2_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: xor $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB2_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadXor32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB2_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: xor $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB2_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadXor32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB2_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: xor $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB2_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadXor32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB2_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: xor $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadXor32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB2_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: xor $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB2_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadXor32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB2_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: xor $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB2_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadXor32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB2_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: xor $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB2_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadXor32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB2_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: xor $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB2_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadXor32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB2_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: xor $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB2_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw xor i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadOr32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB3_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: or $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB3_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadOr32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB3_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: or $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB3_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadOr32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB3_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: or $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB3_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadOr32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB3_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: or $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB3_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadOr32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB3_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: or $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadOr32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB3_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: or $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB3_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadOr32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB3_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: or $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB3_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadOr32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB3_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: or $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB3_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadOr32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB3_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: or $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB3_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadOr32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB3_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: or $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadOr32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB3_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: or $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB3_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadOr32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB3_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: or $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB3_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadOr32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB3_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: or $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB3_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadOr32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB3_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: or $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB3_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadOr32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB3_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: or $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB3_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw or i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAnd32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB4_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: and $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB4_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAnd32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB4_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: and $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB4_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAnd32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB4_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: and $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB4_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAnd32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB4_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: and $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB4_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAnd32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB4_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: and $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAnd32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB4_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB4_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAnd32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB4_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB4_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAnd32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB4_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB4_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAnd32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB4_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB4_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAnd32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB4_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAnd32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB4_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: and $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB4_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAnd32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB4_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB4_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAnd32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB4_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB4_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAnd32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB4_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB4_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAnd32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB4_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: and $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB4_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw and i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadNand32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB5_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: and $3, $2, $4 ++; MIPS32-NEXT: nor $3, $zero, $3 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB5_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadNand32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB5_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: and $1, $2, $4 ++; MIPS32O0-NEXT: nor $1, $zero, $1 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB5_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadNand32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB5_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: and $3, $2, $4 ++; MIPS32R2-NEXT: nor $3, $zero, $3 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB5_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadNand32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB5_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: and $3, $2, $4 ++; MIPS32R6-NEXT: nor $3, $zero, $3 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB5_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadNand32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB5_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: and $1, $2, $4 ++; MIPS32R6O0-NEXT: nor $1, $zero, $1 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadNand32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB5_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: nor $3, $zero, $3 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB5_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB5_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: nor $3, $zero, $3 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB5_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB5_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: nor $3, $zero, $3 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB5_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB5_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: nor $3, $zero, $3 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB5_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB5_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: nor $1, $zero, $1 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadNand32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB5_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: and $3, $2, $4 ++; MM32-NEXT: nor $3, $zero, $3 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB5_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB5_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: nor $3, $zero, $3 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB5_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB5_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: nor $3, $zero, $3 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB5_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB5_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: nor $3, $zero, $3 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB5_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadNand32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB5_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: and $3, $2, $4 ++; MIPS32EB-NEXT: nor $3, $zero, $3 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB5_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicSwap32(i32 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicSwap32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addiu $sp, $sp, -8 ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sw $4, 4($sp) ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB6_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: move $3, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB6_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32O0-LABEL: AtomicSwap32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sw $4, 4($sp) ++; MIPS32O0-NEXT: lw $4, 4($sp) ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB6_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: move $1, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB6_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicSwap32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addiu $sp, $sp, -8 ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sw $4, 4($sp) ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB6_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: move $3, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB6_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6-LABEL: AtomicSwap32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sw $4, 4($sp) ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB6_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: move $3, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB6_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6O0-LABEL: AtomicSwap32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sw $4, 4($sp) ++; MIPS32R6O0-NEXT: lw $4, 4($sp) ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB6_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: move $1, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicSwap32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS4-NEXT: sw $4, 12($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB6_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: move $3, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB6_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicSwap32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64-NEXT: sw $4, 12($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB6_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: move $3, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB6_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicSwap32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R2-NEXT: sw $4, 12($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB6_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: move $3, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB6_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicSwap32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6-NEXT: sw $4, 12($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB6_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: move $3, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB6_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicSwap32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6O0-NEXT: move $2, $4 ++; MIPS64R6O0-NEXT: sw $2, 12($sp) ++; MIPS64R6O0-NEXT: lw $4, 12($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB6_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicSwap32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addiu $sp, $sp, -8 ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sw $4, 4($sp) ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB6_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: or $3, $4, $zero ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB6_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: addiusp 8 ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addiu $sp, $sp, -8 ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sw $4, 4($sp) ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB6_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: move $3, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB6_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: addiu $sp, $sp, 8 ++; ++; O2-LABEL: AtomicSwap32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addiu $sp, $sp, -8 ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sw $4, 4($sp) ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB6_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: move $3, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB6_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: addiu $sp, $sp, 8 ++; ++; O3-LABEL: AtomicSwap32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addiu $sp, $sp, -8 ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sw $4, 4($sp) ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB6_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: move $3, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB6_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32EB-LABEL: AtomicSwap32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addiu $sp, $sp, -8 ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sw $4, 4($sp) ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB6_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: move $3, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB6_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: addiu $sp, $sp, 8 ++entry: ++ %newval.addr = alloca i32, align 4 ++ store i32 %newval, i32* %newval.addr, align 4 ++ %tmp = load i32, i32* %newval.addr, align 4 ++ %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwap32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addiu $sp, $sp, -8 ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sw $5, 4($sp) ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB7_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: bne $2, $4, $BB7_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32-NEXT: move $3, $5 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB7_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB7_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32O0-LABEL: AtomicCmpSwap32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sw $5, 4($sp) ++; MIPS32O0-NEXT: lw $6, 4($sp) ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: move $5, $4 ++; MIPS32O0-NEXT: $BB7_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: bne $2, $5, $BB7_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32O0-NEXT: move $1, $6 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB7_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB7_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: xor $1, $2, $4 ++; MIPS32O0-NEXT: sltiu $1, $1, 1 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwap32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addiu $sp, $sp, -8 ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sw $5, 4($sp) ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB7_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: bne $2, $4, $BB7_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R2-NEXT: move $3, $5 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB7_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB7_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6-LABEL: AtomicCmpSwap32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sw $5, 4($sp) ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB7_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: bnec $2, $4, $BB7_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R6-NEXT: move $3, $5 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB7_1 ++; MIPS32R6-NEXT: $BB7_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwap32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sw $5, 4($sp) ++; MIPS32R6O0-NEXT: lw $5, 4($sp) ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB7_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R6O0-NEXT: move $1, $5 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 ++; MIPS32R6O0-NEXT: $BB7_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwap32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS4-NEXT: sw $5, 12($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB7_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: bne $2, $4, .LBB7_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS4-NEXT: move $3, $5 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB7_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB7_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicCmpSwap32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64-NEXT: sw $5, 12($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB7_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64-NEXT: move $3, $5 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB7_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB7_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicCmpSwap32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R2-NEXT: sw $5, 12($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB7_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R2-NEXT: move $3, $5 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB7_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB7_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicCmpSwap32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6-NEXT: sw $5, 12($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB7_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6-NEXT: move $3, $5 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB7_1 ++; MIPS64R6-NEXT: .LBB7_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sw $2, 12($sp) ++; MIPS64R6O0-NEXT: lw $5, 12($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB7_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ++; MIPS64R6O0-NEXT: .LBB7_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwap32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addiu $sp, $sp, -8 ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sw $5, 4($sp) ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB7_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: bne $2, $4, $BB7_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MM32-NEXT: move $3, $5 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB7_1 ++; MM32-NEXT: $BB7_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: addiusp 8 ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addiu $sp, $sp, -8 ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sw $5, 4($sp) ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB7_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: bne $2, $4, $BB7_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O1-NEXT: move $3, $5 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB7_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB7_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: addiu $sp, $sp, 8 ++; ++; O2-LABEL: AtomicCmpSwap32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addiu $sp, $sp, -8 ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sw $5, 4($sp) ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB7_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: bne $2, $4, $BB7_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O2-NEXT: move $3, $5 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB7_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB7_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: addiu $sp, $sp, 8 ++; ++; O3-LABEL: AtomicCmpSwap32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addiu $sp, $sp, -8 ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sw $5, 4($sp) ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB7_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: bne $2, $4, $BB7_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O3-NEXT: move $3, $5 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB7_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB7_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32EB-LABEL: AtomicCmpSwap32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addiu $sp, $sp, -8 ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sw $5, 4($sp) ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB7_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: bne $2, $4, $BB7_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32EB-NEXT: move $3, $5 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB7_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB7_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: addiu $sp, $sp, 8 ++entry: ++ %newval.addr = alloca i32, align 4 ++ store i32 %newval, i32* %newval.addr, align 4 ++ %tmp = load i32, i32* %newval.addr, align 4 ++ %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic ++ %1 = extractvalue { i32, i1 } %0, 0 ++ ret i32 %1 ++ ++} ++ ++@y = common global i8 0, align 1 ++ ++define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB8_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: addu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB8_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB8_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: addu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB8_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB8_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: addu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB8_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB8_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: addu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB8_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB8_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: addu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB8_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: addu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB8_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB8_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: addu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB8_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB8_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: addu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB8_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB8_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: addu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB8_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB8_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: addu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB8_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: addu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB8_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB8_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: addu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB8_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB8_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: addu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB8_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB8_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: addu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB8_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB8_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: addu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB8_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i8* @y, i8 %incr monotonic ++ ret i8 %0 ++} ++ ++define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadSub8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB9_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: subu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB9_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadSub8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB9_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: subu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB9_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadSub8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB9_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: subu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB9_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadSub8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB9_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: subu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB9_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadSub8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB9_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: subu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadSub8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB9_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: subu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB9_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB9_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: subu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB9_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB9_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: subu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB9_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB9_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: subu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB9_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB9_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: subu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadSub8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB9_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: subu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB9_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB9_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: subu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB9_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB9_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: subu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB9_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB9_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: subu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB9_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadSub8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB9_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: subu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB9_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i8* @y, i8 %incr monotonic ++ ret i8 %0 ++ ++} ++ ++define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadNand8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB10_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: and $8, $7, $4 ++; MIPS32-NEXT: nor $8, $zero, $8 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB10_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadNand8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB10_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: and $3, $2, $6 ++; MIPS32O0-NEXT: nor $3, $zero, $3 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB10_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadNand8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB10_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: and $8, $7, $4 ++; MIPS32R2-NEXT: nor $8, $zero, $8 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB10_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadNand8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB10_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: and $8, $7, $4 ++; MIPS32R6-NEXT: nor $8, $zero, $8 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB10_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadNand8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB10_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: and $3, $2, $6 ++; MIPS32R6O0-NEXT: nor $3, $zero, $3 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadNand8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB10_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: and $8, $7, $4 ++; MIPS4-NEXT: nor $8, $zero, $8 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB10_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB10_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: and $8, $7, $4 ++; MIPS64-NEXT: nor $8, $zero, $8 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB10_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB10_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: and $8, $7, $4 ++; MIPS64R2-NEXT: nor $8, $zero, $8 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB10_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB10_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: and $8, $7, $4 ++; MIPS64R6-NEXT: nor $8, $zero, $8 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB10_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB10_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: and $3, $2, $6 ++; MIPS64R6O0-NEXT: nor $3, $zero, $3 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadNand8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB10_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: and $8, $7, $4 ++; MM32-NEXT: nor $8, $zero, $8 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB10_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB10_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: and $8, $7, $4 ++; O1-NEXT: nor $8, $zero, $8 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB10_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB10_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: and $8, $7, $4 ++; O2-NEXT: nor $8, $zero, $8 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB10_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB10_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: and $8, $7, $4 ++; O3-NEXT: nor $8, $zero, $8 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB10_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadNand8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB10_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: and $8, $7, $4 ++; MIPS32EB-NEXT: nor $8, $zero, $8 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB10_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i8* @y, i8 %incr monotonic ++ ret i8 %0 ++ ++} ++ ++define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicSwap8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB11_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: and $8, $4, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB11_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicSwap8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB11_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: and $3, $6, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB11_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicSwap8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB11_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: and $8, $4, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB11_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicSwap8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB11_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: and $8, $4, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB11_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicSwap8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB11_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: and $3, $6, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicSwap8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB11_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: and $8, $4, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB11_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicSwap8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB11_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: and $8, $4, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB11_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicSwap8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB11_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: and $8, $4, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB11_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicSwap8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB11_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: and $8, $4, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB11_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicSwap8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB11_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: and $3, $6, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicSwap8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB11_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: and $8, $4, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB11_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB11_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: and $8, $4, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB11_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicSwap8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB11_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: and $8, $4, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB11_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicSwap8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB11_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: and $8, $4, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB11_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicSwap8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB11_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: and $8, $4, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB11_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw xchg i8* @y, i8 %newval monotonic ++ ret i8 %0 ++} ++ ++define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwap8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $6, $2, $1 ++; MIPS32-NEXT: nor $7, $zero, $6 ++; MIPS32-NEXT: andi $2, $4, 255 ++; MIPS32-NEXT: sllv $4, $2, $1 ++; MIPS32-NEXT: andi $2, $5, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: $BB12_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $8, 0($3) ++; MIPS32-NEXT: and $9, $8, $6 ++; MIPS32-NEXT: bne $9, $4, $BB12_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32-NEXT: and $8, $8, $7 ++; MIPS32-NEXT: or $8, $8, $5 ++; MIPS32-NEXT: sc $8, 0($3) ++; MIPS32-NEXT: beqz $8, $BB12_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB12_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $2, $9, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.4: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicCmpSwap8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $3, $2, $25 ++; MIPS32O0-NEXT: move $1, $5 ++; MIPS32O0-NEXT: move $2, $4 ++; MIPS32O0-NEXT: lw $3, %got(y)($3) ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 255 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 255 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 255 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB12_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB12_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB12_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB12_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwap8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $6, $2, $1 ++; MIPS32R2-NEXT: nor $7, $zero, $6 ++; MIPS32R2-NEXT: andi $2, $4, 255 ++; MIPS32R2-NEXT: sllv $4, $2, $1 ++; MIPS32R2-NEXT: andi $2, $5, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: $BB12_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $8, 0($3) ++; MIPS32R2-NEXT: and $9, $8, $6 ++; MIPS32R2-NEXT: bne $9, $4, $BB12_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R2-NEXT: and $8, $8, $7 ++; MIPS32R2-NEXT: or $8, $8, $5 ++; MIPS32R2-NEXT: sc $8, 0($3) ++; MIPS32R2-NEXT: beqz $8, $BB12_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB12_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $2, $9, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.4: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicCmpSwap8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $6, $2, $1 ++; MIPS32R6-NEXT: nor $7, $zero, $6 ++; MIPS32R6-NEXT: andi $2, $4, 255 ++; MIPS32R6-NEXT: sllv $4, $2, $1 ++; MIPS32R6-NEXT: andi $2, $5, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: $BB12_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $8, 0($3) ++; MIPS32R6-NEXT: and $9, $8, $6 ++; MIPS32R6-NEXT: bnec $9, $4, $BB12_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R6-NEXT: and $8, $8, $7 ++; MIPS32R6-NEXT: or $8, $8, $5 ++; MIPS32R6-NEXT: sc $8, 0($3) ++; MIPS32R6-NEXT: beqzc $8, $BB12_1 ++; MIPS32R6-NEXT: $BB12_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $2, $9, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.4: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwap8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $3, $2, $25 ++; MIPS32R6O0-NEXT: move $1, $5 ++; MIPS32R6O0-NEXT: move $2, $4 ++; MIPS32R6O0-NEXT: # kill: def $a1 killed $at ++; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 ++; MIPS32R6O0-NEXT: lw $3, %got(y)($3) ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 255 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 255 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB12_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 ++; MIPS32R6O0-NEXT: $BB12_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwap8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $6, $2, $1 ++; MIPS4-NEXT: nor $7, $zero, $6 ++; MIPS4-NEXT: andi $2, $4, 255 ++; MIPS4-NEXT: sllv $4, $2, $1 ++; MIPS4-NEXT: andi $2, $5, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: .LBB12_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $8, 0($3) ++; MIPS4-NEXT: and $9, $8, $6 ++; MIPS4-NEXT: bne $9, $4, .LBB12_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS4-NEXT: and $8, $8, $7 ++; MIPS4-NEXT: or $8, $8, $5 ++; MIPS4-NEXT: sc $8, 0($3) ++; MIPS4-NEXT: beqz $8, .LBB12_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB12_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $2, $9, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.4: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicCmpSwap8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $6, $2, $1 ++; MIPS64-NEXT: nor $7, $zero, $6 ++; MIPS64-NEXT: andi $2, $4, 255 ++; MIPS64-NEXT: sllv $4, $2, $1 ++; MIPS64-NEXT: andi $2, $5, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: .LBB12_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $8, 0($3) ++; MIPS64-NEXT: and $9, $8, $6 ++; MIPS64-NEXT: bne $9, $4, .LBB12_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64-NEXT: and $8, $8, $7 ++; MIPS64-NEXT: or $8, $8, $5 ++; MIPS64-NEXT: sc $8, 0($3) ++; MIPS64-NEXT: beqz $8, .LBB12_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB12_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $2, $9, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.4: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicCmpSwap8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $6, $2, $1 ++; MIPS64R2-NEXT: nor $7, $zero, $6 ++; MIPS64R2-NEXT: andi $2, $4, 255 ++; MIPS64R2-NEXT: sllv $4, $2, $1 ++; MIPS64R2-NEXT: andi $2, $5, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: .LBB12_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $8, 0($3) ++; MIPS64R2-NEXT: and $9, $8, $6 ++; MIPS64R2-NEXT: bne $9, $4, .LBB12_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R2-NEXT: and $8, $8, $7 ++; MIPS64R2-NEXT: or $8, $8, $5 ++; MIPS64R2-NEXT: sc $8, 0($3) ++; MIPS64R2-NEXT: beqz $8, .LBB12_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB12_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $2, $9, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.4: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicCmpSwap8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $6, $2, $1 ++; MIPS64R6-NEXT: nor $7, $zero, $6 ++; MIPS64R6-NEXT: andi $2, $4, 255 ++; MIPS64R6-NEXT: sllv $4, $2, $1 ++; MIPS64R6-NEXT: andi $2, $5, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: .LBB12_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $8, 0($3) ++; MIPS64R6-NEXT: and $9, $8, $6 ++; MIPS64R6-NEXT: bnec $9, $4, .LBB12_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R6-NEXT: and $8, $8, $7 ++; MIPS64R6-NEXT: or $8, $8, $5 ++; MIPS64R6-NEXT: sc $8, 0($3) ++; MIPS64R6-NEXT: beqzc $8, .LBB12_1 ++; MIPS64R6-NEXT: .LBB12_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $2, $9, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.4: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: move $2, $4 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 3 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 255 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 255 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB12_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 ++; MIPS64R6O0-NEXT: .LBB12_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwap8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $6, $2, $1 ++; MM32-NEXT: nor $7, $zero, $6 ++; MM32-NEXT: andi $2, $4, 255 ++; MM32-NEXT: sllv $4, $2, $1 ++; MM32-NEXT: andi $2, $5, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: $BB12_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $8, 0($3) ++; MM32-NEXT: and $9, $8, $6 ++; MM32-NEXT: bne $9, $4, $BB12_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MM32-NEXT: and $8, $8, $7 ++; MM32-NEXT: or $8, $8, $5 ++; MM32-NEXT: sc $8, 0($3) ++; MM32-NEXT: beqzc $8, $BB12_1 ++; MM32-NEXT: $BB12_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $2, $9, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.4: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $6, $2, $1 ++; O1-NEXT: nor $7, $zero, $6 ++; O1-NEXT: andi $2, $4, 255 ++; O1-NEXT: sllv $4, $2, $1 ++; O1-NEXT: andi $2, $5, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: $BB12_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $8, 0($3) ++; O1-NEXT: and $9, $8, $6 ++; O1-NEXT: bne $9, $4, $BB12_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O1-NEXT: and $8, $8, $7 ++; O1-NEXT: or $8, $8, $5 ++; O1-NEXT: sc $8, 0($3) ++; O1-NEXT: beqz $8, $BB12_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB12_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: srlv $2, $9, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.4: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicCmpSwap8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $6, $2, $1 ++; O2-NEXT: nor $7, $zero, $6 ++; O2-NEXT: andi $2, $4, 255 ++; O2-NEXT: sllv $4, $2, $1 ++; O2-NEXT: andi $2, $5, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: $BB12_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $8, 0($3) ++; O2-NEXT: and $9, $8, $6 ++; O2-NEXT: bne $9, $4, $BB12_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O2-NEXT: and $8, $8, $7 ++; O2-NEXT: or $8, $8, $5 ++; O2-NEXT: sc $8, 0($3) ++; O2-NEXT: beqz $8, $BB12_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB12_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: srlv $2, $9, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.4: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicCmpSwap8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $6, $2, $1 ++; O3-NEXT: andi $2, $4, 255 ++; O3-NEXT: sllv $4, $2, $1 ++; O3-NEXT: andi $2, $5, 255 ++; O3-NEXT: nor $7, $zero, $6 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: $BB12_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $8, 0($3) ++; O3-NEXT: and $9, $8, $6 ++; O3-NEXT: bne $9, $4, $BB12_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O3-NEXT: and $8, $8, $7 ++; O3-NEXT: or $8, $8, $5 ++; O3-NEXT: sc $8, 0($3) ++; O3-NEXT: beqz $8, $BB12_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB12_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: srlv $2, $9, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.4: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicCmpSwap8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $6, $2, $1 ++; MIPS32EB-NEXT: nor $7, $zero, $6 ++; MIPS32EB-NEXT: andi $2, $4, 255 ++; MIPS32EB-NEXT: sllv $4, $2, $1 ++; MIPS32EB-NEXT: andi $2, $5, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: $BB12_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $8, 0($3) ++; MIPS32EB-NEXT: and $9, $8, $6 ++; MIPS32EB-NEXT: bne $9, $4, $BB12_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32EB-NEXT: and $8, $8, $7 ++; MIPS32EB-NEXT: or $8, $8, $5 ++; MIPS32EB-NEXT: sc $8, 0($3) ++; MIPS32EB-NEXT: beqz $8, $BB12_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB12_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $2, $9, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.4: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic ++ %0 = extractvalue { i8, i1 } %pair0, 0 ++ ret i8 %0 ++} ++ ++define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwapRes8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: addiu $1, $zero, -4 ++; MIPS32-NEXT: and $2, $4, $1 ++; MIPS32-NEXT: andi $1, $4, 3 ++; MIPS32-NEXT: sll $3, $1, 3 ++; MIPS32-NEXT: ori $1, $zero, 255 ++; MIPS32-NEXT: sllv $4, $1, $3 ++; MIPS32-NEXT: nor $7, $zero, $4 ++; MIPS32-NEXT: andi $1, $5, 255 ++; MIPS32-NEXT: sllv $8, $1, $3 ++; MIPS32-NEXT: andi $1, $6, 255 ++; MIPS32-NEXT: sllv $6, $1, $3 ++; MIPS32-NEXT: $BB13_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $9, 0($2) ++; MIPS32-NEXT: and $10, $9, $4 ++; MIPS32-NEXT: bne $10, $8, $BB13_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32-NEXT: and $9, $9, $7 ++; MIPS32-NEXT: or $9, $9, $6 ++; MIPS32-NEXT: sc $9, 0($2) ++; MIPS32-NEXT: beqz $9, $BB13_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB13_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $1, $10, $3 ++; MIPS32-NEXT: sll $1, $1, 24 ++; MIPS32-NEXT: sra $1, $1, 24 ++; MIPS32-NEXT: # %bb.4: # %entry ++; MIPS32-NEXT: xor $1, $1, $5 ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32O0-LABEL: AtomicCmpSwapRes8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: move $1, $6 ++; MIPS32O0-NEXT: move $2, $5 ++; MIPS32O0-NEXT: move $3, $4 ++; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 255 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 255 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 255 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB13_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB13_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB13_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB13_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $2, $2, 24 ++; MIPS32O0-NEXT: sra $2, $2, 24 ++; MIPS32O0-NEXT: xor $1, $1, $2 ++; MIPS32O0-NEXT: sltiu $2, $1, 1 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwapRes8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: addiu $1, $zero, -4 ++; MIPS32R2-NEXT: and $2, $4, $1 ++; MIPS32R2-NEXT: andi $1, $4, 3 ++; MIPS32R2-NEXT: sll $3, $1, 3 ++; MIPS32R2-NEXT: ori $1, $zero, 255 ++; MIPS32R2-NEXT: sllv $4, $1, $3 ++; MIPS32R2-NEXT: nor $7, $zero, $4 ++; MIPS32R2-NEXT: andi $1, $5, 255 ++; MIPS32R2-NEXT: sllv $8, $1, $3 ++; MIPS32R2-NEXT: andi $1, $6, 255 ++; MIPS32R2-NEXT: sllv $6, $1, $3 ++; MIPS32R2-NEXT: $BB13_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $9, 0($2) ++; MIPS32R2-NEXT: and $10, $9, $4 ++; MIPS32R2-NEXT: bne $10, $8, $BB13_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R2-NEXT: and $9, $9, $7 ++; MIPS32R2-NEXT: or $9, $9, $6 ++; MIPS32R2-NEXT: sc $9, 0($2) ++; MIPS32R2-NEXT: beqz $9, $BB13_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB13_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $1, $10, $3 ++; MIPS32R2-NEXT: seb $1, $1 ++; MIPS32R2-NEXT: # %bb.4: # %entry ++; MIPS32R2-NEXT: xor $1, $1, $5 ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32R6-LABEL: AtomicCmpSwapRes8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: addiu $1, $zero, -4 ++; MIPS32R6-NEXT: and $2, $4, $1 ++; MIPS32R6-NEXT: andi $1, $4, 3 ++; MIPS32R6-NEXT: sll $3, $1, 3 ++; MIPS32R6-NEXT: ori $1, $zero, 255 ++; MIPS32R6-NEXT: sllv $4, $1, $3 ++; MIPS32R6-NEXT: nor $7, $zero, $4 ++; MIPS32R6-NEXT: andi $1, $5, 255 ++; MIPS32R6-NEXT: sllv $8, $1, $3 ++; MIPS32R6-NEXT: andi $1, $6, 255 ++; MIPS32R6-NEXT: sllv $6, $1, $3 ++; MIPS32R6-NEXT: $BB13_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $9, 0($2) ++; MIPS32R6-NEXT: and $10, $9, $4 ++; MIPS32R6-NEXT: bnec $10, $8, $BB13_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R6-NEXT: and $9, $9, $7 ++; MIPS32R6-NEXT: or $9, $9, $6 ++; MIPS32R6-NEXT: sc $9, 0($2) ++; MIPS32R6-NEXT: beqzc $9, $BB13_1 ++; MIPS32R6-NEXT: $BB13_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $1, $10, $3 ++; MIPS32R6-NEXT: seb $1, $1 ++; MIPS32R6-NEXT: # %bb.4: # %entry ++; MIPS32R6-NEXT: xor $1, $1, $5 ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwapRes8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: move $1, $6 ++; MIPS32R6O0-NEXT: move $2, $5 ++; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: move $3, $4 ++; MIPS32R6O0-NEXT: # kill: def $a2 killed $at ++; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 255 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 255 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB13_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 ++; MIPS32R6O0-NEXT: $BB13_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: # %entry ++; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: xor $1, $1, $2 ++; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwapRes8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $1, $zero, -4 ++; MIPS4-NEXT: and $2, $4, $1 ++; MIPS4-NEXT: andi $1, $4, 3 ++; MIPS4-NEXT: sll $3, $1, 3 ++; MIPS4-NEXT: ori $1, $zero, 255 ++; MIPS4-NEXT: sllv $4, $1, $3 ++; MIPS4-NEXT: nor $7, $zero, $4 ++; MIPS4-NEXT: andi $1, $5, 255 ++; MIPS4-NEXT: sllv $8, $1, $3 ++; MIPS4-NEXT: andi $1, $6, 255 ++; MIPS4-NEXT: sllv $6, $1, $3 ++; MIPS4-NEXT: .LBB13_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $9, 0($2) ++; MIPS4-NEXT: and $10, $9, $4 ++; MIPS4-NEXT: bne $10, $8, .LBB13_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS4-NEXT: and $9, $9, $7 ++; MIPS4-NEXT: or $9, $9, $6 ++; MIPS4-NEXT: sc $9, 0($2) ++; MIPS4-NEXT: beqz $9, .LBB13_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB13_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $1, $10, $3 ++; MIPS4-NEXT: sll $1, $1, 24 ++; MIPS4-NEXT: sra $1, $1, 24 ++; MIPS4-NEXT: # %bb.4: # %entry ++; MIPS4-NEXT: xor $1, $1, $5 ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64-LABEL: AtomicCmpSwapRes8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $1, $zero, -4 ++; MIPS64-NEXT: and $2, $4, $1 ++; MIPS64-NEXT: andi $1, $4, 3 ++; MIPS64-NEXT: sll $3, $1, 3 ++; MIPS64-NEXT: ori $1, $zero, 255 ++; MIPS64-NEXT: sllv $4, $1, $3 ++; MIPS64-NEXT: nor $7, $zero, $4 ++; MIPS64-NEXT: andi $1, $5, 255 ++; MIPS64-NEXT: sllv $8, $1, $3 ++; MIPS64-NEXT: andi $1, $6, 255 ++; MIPS64-NEXT: sllv $6, $1, $3 ++; MIPS64-NEXT: .LBB13_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $9, 0($2) ++; MIPS64-NEXT: and $10, $9, $4 ++; MIPS64-NEXT: bne $10, $8, .LBB13_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64-NEXT: and $9, $9, $7 ++; MIPS64-NEXT: or $9, $9, $6 ++; MIPS64-NEXT: sc $9, 0($2) ++; MIPS64-NEXT: beqz $9, .LBB13_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB13_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $1, $10, $3 ++; MIPS64-NEXT: sll $1, $1, 24 ++; MIPS64-NEXT: sra $1, $1, 24 ++; MIPS64-NEXT: # %bb.4: # %entry ++; MIPS64-NEXT: xor $1, $1, $5 ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R2-LABEL: AtomicCmpSwapRes8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $1, $zero, -4 ++; MIPS64R2-NEXT: and $2, $4, $1 ++; MIPS64R2-NEXT: andi $1, $4, 3 ++; MIPS64R2-NEXT: sll $3, $1, 3 ++; MIPS64R2-NEXT: ori $1, $zero, 255 ++; MIPS64R2-NEXT: sllv $4, $1, $3 ++; MIPS64R2-NEXT: nor $7, $zero, $4 ++; MIPS64R2-NEXT: andi $1, $5, 255 ++; MIPS64R2-NEXT: sllv $8, $1, $3 ++; MIPS64R2-NEXT: andi $1, $6, 255 ++; MIPS64R2-NEXT: sllv $6, $1, $3 ++; MIPS64R2-NEXT: .LBB13_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $9, 0($2) ++; MIPS64R2-NEXT: and $10, $9, $4 ++; MIPS64R2-NEXT: bne $10, $8, .LBB13_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R2-NEXT: and $9, $9, $7 ++; MIPS64R2-NEXT: or $9, $9, $6 ++; MIPS64R2-NEXT: sc $9, 0($2) ++; MIPS64R2-NEXT: beqz $9, .LBB13_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB13_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $1, $10, $3 ++; MIPS64R2-NEXT: seb $1, $1 ++; MIPS64R2-NEXT: # %bb.4: # %entry ++; MIPS64R2-NEXT: xor $1, $1, $5 ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R6-LABEL: AtomicCmpSwapRes8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $1, $zero, -4 ++; MIPS64R6-NEXT: and $2, $4, $1 ++; MIPS64R6-NEXT: andi $1, $4, 3 ++; MIPS64R6-NEXT: sll $3, $1, 3 ++; MIPS64R6-NEXT: ori $1, $zero, 255 ++; MIPS64R6-NEXT: sllv $4, $1, $3 ++; MIPS64R6-NEXT: nor $7, $zero, $4 ++; MIPS64R6-NEXT: andi $1, $5, 255 ++; MIPS64R6-NEXT: sllv $8, $1, $3 ++; MIPS64R6-NEXT: andi $1, $6, 255 ++; MIPS64R6-NEXT: sllv $6, $1, $3 ++; MIPS64R6-NEXT: .LBB13_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $9, 0($2) ++; MIPS64R6-NEXT: and $10, $9, $4 ++; MIPS64R6-NEXT: bnec $10, $8, .LBB13_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R6-NEXT: and $9, $9, $7 ++; MIPS64R6-NEXT: or $9, $9, $6 ++; MIPS64R6-NEXT: sc $9, 0($2) ++; MIPS64R6-NEXT: beqzc $9, .LBB13_1 ++; MIPS64R6-NEXT: .LBB13_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $1, $10, $3 ++; MIPS64R6-NEXT: seb $1, $1 ++; MIPS64R6-NEXT: # %bb.4: # %entry ++; MIPS64R6-NEXT: xor $1, $1, $5 ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: move $3, $4 ++; MIPS64R6O0-NEXT: move $1, $6 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 3 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 255 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 255 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB13_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 ++; MIPS64R6O0-NEXT: .LBB13_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: # %entry ++; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: xor $1, $1, $2 ++; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwapRes8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: addiu $1, $zero, -4 ++; MM32-NEXT: and $2, $4, $1 ++; MM32-NEXT: andi $1, $4, 3 ++; MM32-NEXT: sll $3, $1, 3 ++; MM32-NEXT: ori $1, $zero, 255 ++; MM32-NEXT: sllv $4, $1, $3 ++; MM32-NEXT: nor $7, $zero, $4 ++; MM32-NEXT: andi $1, $5, 255 ++; MM32-NEXT: sllv $8, $1, $3 ++; MM32-NEXT: andi $1, $6, 255 ++; MM32-NEXT: sllv $6, $1, $3 ++; MM32-NEXT: $BB13_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $9, 0($2) ++; MM32-NEXT: and $10, $9, $4 ++; MM32-NEXT: bne $10, $8, $BB13_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MM32-NEXT: and $9, $9, $7 ++; MM32-NEXT: or $9, $9, $6 ++; MM32-NEXT: sc $9, 0($2) ++; MM32-NEXT: beqzc $9, $BB13_1 ++; MM32-NEXT: $BB13_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $1, $10, $3 ++; MM32-NEXT: seb $1, $1 ++; MM32-NEXT: # %bb.4: # %entry ++; MM32-NEXT: xor $1, $1, $5 ++; MM32-NEXT: jr $ra ++; MM32-NEXT: sltiu $2, $1, 1 ++; ++; O1-LABEL: AtomicCmpSwapRes8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: addiu $1, $zero, -4 ++; O1-NEXT: and $2, $4, $1 ++; O1-NEXT: andi $1, $4, 3 ++; O1-NEXT: sll $3, $1, 3 ++; O1-NEXT: ori $1, $zero, 255 ++; O1-NEXT: sllv $4, $1, $3 ++; O1-NEXT: nor $7, $zero, $4 ++; O1-NEXT: andi $1, $5, 255 ++; O1-NEXT: sllv $8, $1, $3 ++; O1-NEXT: andi $1, $6, 255 ++; O1-NEXT: sllv $6, $1, $3 ++; O1-NEXT: $BB13_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $9, 0($2) ++; O1-NEXT: and $10, $9, $4 ++; O1-NEXT: bne $10, $8, $BB13_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O1-NEXT: and $9, $9, $7 ++; O1-NEXT: or $9, $9, $6 ++; O1-NEXT: sc $9, 0($2) ++; O1-NEXT: beqz $9, $BB13_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB13_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: srlv $1, $10, $3 ++; O1-NEXT: sll $1, $1, 24 ++; O1-NEXT: sra $1, $1, 24 ++; O1-NEXT: # %bb.4: # %entry ++; O1-NEXT: xor $1, $1, $5 ++; O1-NEXT: jr $ra ++; O1-NEXT: sltiu $2, $1, 1 ++; ++; O2-LABEL: AtomicCmpSwapRes8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: addiu $1, $zero, -4 ++; O2-NEXT: and $2, $4, $1 ++; O2-NEXT: andi $1, $4, 3 ++; O2-NEXT: sll $3, $1, 3 ++; O2-NEXT: ori $1, $zero, 255 ++; O2-NEXT: sllv $4, $1, $3 ++; O2-NEXT: nor $7, $zero, $4 ++; O2-NEXT: andi $1, $5, 255 ++; O2-NEXT: sllv $8, $1, $3 ++; O2-NEXT: andi $1, $6, 255 ++; O2-NEXT: sllv $6, $1, $3 ++; O2-NEXT: $BB13_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $9, 0($2) ++; O2-NEXT: and $10, $9, $4 ++; O2-NEXT: bne $10, $8, $BB13_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O2-NEXT: and $9, $9, $7 ++; O2-NEXT: or $9, $9, $6 ++; O2-NEXT: sc $9, 0($2) ++; O2-NEXT: beqz $9, $BB13_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB13_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: srlv $1, $10, $3 ++; O2-NEXT: sll $1, $1, 24 ++; O2-NEXT: sra $1, $1, 24 ++; O2-NEXT: # %bb.4: # %entry ++; O2-NEXT: xor $1, $1, $5 ++; O2-NEXT: jr $ra ++; O2-NEXT: sltiu $2, $1, 1 ++; ++; O3-LABEL: AtomicCmpSwapRes8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: addiu $1, $zero, -4 ++; O3-NEXT: and $2, $4, $1 ++; O3-NEXT: andi $1, $4, 3 ++; O3-NEXT: sll $3, $1, 3 ++; O3-NEXT: ori $1, $zero, 255 ++; O3-NEXT: sllv $4, $1, $3 ++; O3-NEXT: andi $1, $5, 255 ++; O3-NEXT: sllv $8, $1, $3 ++; O3-NEXT: andi $1, $6, 255 ++; O3-NEXT: nor $7, $zero, $4 ++; O3-NEXT: sllv $6, $1, $3 ++; O3-NEXT: $BB13_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $9, 0($2) ++; O3-NEXT: and $10, $9, $4 ++; O3-NEXT: bne $10, $8, $BB13_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O3-NEXT: and $9, $9, $7 ++; O3-NEXT: or $9, $9, $6 ++; O3-NEXT: sc $9, 0($2) ++; O3-NEXT: beqz $9, $BB13_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB13_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: srlv $1, $10, $3 ++; O3-NEXT: sll $1, $1, 24 ++; O3-NEXT: sra $1, $1, 24 ++; O3-NEXT: # %bb.4: # %entry ++; O3-NEXT: xor $1, $1, $5 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32EB-LABEL: AtomicCmpSwapRes8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: addiu $1, $zero, -4 ++; MIPS32EB-NEXT: and $2, $4, $1 ++; MIPS32EB-NEXT: andi $1, $4, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $3, $1, 3 ++; MIPS32EB-NEXT: ori $1, $zero, 255 ++; MIPS32EB-NEXT: sllv $4, $1, $3 ++; MIPS32EB-NEXT: nor $7, $zero, $4 ++; MIPS32EB-NEXT: andi $1, $5, 255 ++; MIPS32EB-NEXT: sllv $8, $1, $3 ++; MIPS32EB-NEXT: andi $1, $6, 255 ++; MIPS32EB-NEXT: sllv $6, $1, $3 ++; MIPS32EB-NEXT: $BB13_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $9, 0($2) ++; MIPS32EB-NEXT: and $10, $9, $4 ++; MIPS32EB-NEXT: bne $10, $8, $BB13_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32EB-NEXT: and $9, $9, $7 ++; MIPS32EB-NEXT: or $9, $9, $6 ++; MIPS32EB-NEXT: sc $9, 0($2) ++; MIPS32EB-NEXT: beqz $9, $BB13_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB13_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $1, $10, $3 ++; MIPS32EB-NEXT: sll $1, $1, 24 ++; MIPS32EB-NEXT: sra $1, $1, 24 ++; MIPS32EB-NEXT: # %bb.4: # %entry ++; MIPS32EB-NEXT: xor $1, $1, $5 ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: sltiu $2, $1, 1 ++entry: ++ %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic ++ %1 = extractvalue { i8, i1 } %0, 1 ++ ret i1 %1 ++; FIXME: -march=mips produces a redundant sign extension here... ++; FIXME: ...Leading to this split check. ++ ++} ++ ++; Check one i16 so that we cover the seh sign extend ++@z = common global i16 0, align 1 ++ ++define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd16: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(z)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 65535 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB14_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: addu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB14_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 16 ++; MIPS32-NEXT: sra $2, $2, 16 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd16: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(z)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 65535 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB14_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: addu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB14_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $2, $1, 16 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd16: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(z)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 65535 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB14_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: addu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB14_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seh $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd16: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(z)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 65535 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB14_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: addu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB14_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seh $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd16: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(z)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 65535 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB14_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: addu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd16: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS4-NEXT: ld $1, %got_disp(z)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 65535 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB14_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: addu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB14_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 16 ++; MIPS4-NEXT: sra $2, $2, 16 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd16: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 65535 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB14_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: addu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB14_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 16 ++; MIPS64-NEXT: sra $2, $2, 16 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd16: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R2-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 65535 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB14_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: addu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB14_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seh $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd16: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 65535 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB14_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: addu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB14_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seh $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd16: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 2 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 65535 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB14_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: addu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd16: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(z)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 65535 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB14_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: addu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB14_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seh $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd16: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(z)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 65535 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB14_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: addu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB14_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 16 ++; O1-NEXT: sra $2, $2, 16 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd16: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(z)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 65535 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB14_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: addu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB14_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 16 ++; O2-NEXT: sra $2, $2, 16 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd16: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(z)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 65535 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB14_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: addu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB14_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 16 ++; O3-NEXT: sra $2, $2, 16 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd16: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(z)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 2 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 65535 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB14_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: addu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB14_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 16 ++; MIPS32EB-NEXT: sra $2, $2, 16 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i16* @z, i16 %incr monotonic ++ ret i16 %0 ++ ++} ++ ++; Test that the i16 return value from cmpxchg is recognised as signed, ++; so that setCC doesn't end up comparing an unsigned value to a signed ++; value. ++; The rest of the functions here are testing the atomic expansion, so ++; we just match the end of the function. ++define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ++; MIPS32-LABEL: foo: ++; MIPS32: # %bb.0: ++; MIPS32-NEXT: addu $1, $5, $6 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $4, $2 ++; MIPS32-NEXT: andi $2, $4, 3 ++; MIPS32-NEXT: sll $4, $2, 3 ++; MIPS32-NEXT: ori $2, $zero, 65535 ++; MIPS32-NEXT: sllv $5, $2, $4 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: andi $2, $1, 65535 ++; MIPS32-NEXT: sllv $8, $2, $4 ++; MIPS32-NEXT: andi $2, $7, 65535 ++; MIPS32-NEXT: sllv $7, $2, $4 ++; MIPS32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $9, 0($3) ++; MIPS32-NEXT: and $10, $9, $5 ++; MIPS32-NEXT: bne $10, $8, $BB15_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32-NEXT: and $9, $9, $6 ++; MIPS32-NEXT: or $9, $9, $7 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB15_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB15_3: ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $2, $10, $4 ++; MIPS32-NEXT: sll $2, $2, 16 ++; MIPS32-NEXT: sra $2, $2, 16 ++; MIPS32-NEXT: # %bb.4: ++; MIPS32-NEXT: sll $1, $1, 16 ++; MIPS32-NEXT: sra $1, $1, 16 ++; MIPS32-NEXT: xor $1, $2, $1 ++; MIPS32-NEXT: sltiu $3, $1, 1 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: foo: ++; MIPS32O0: # %bb.0: ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 ++; MIPS32O0-NEXT: move $1, $7 ++; MIPS32O0-NEXT: move $3, $4 ++; MIPS32O0-NEXT: addu $2, $5, $6 ++; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 65535 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 65535 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 65535 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB15_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB15_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB15_3: ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: # %bb.4: ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: ++; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: xor $1, $2, $1 ++; MIPS32O0-NEXT: sltiu $3, $1, 1 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: foo: ++; MIPS32R2: # %bb.0: ++; MIPS32R2-NEXT: addu $1, $5, $6 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $4, $2 ++; MIPS32R2-NEXT: andi $2, $4, 3 ++; MIPS32R2-NEXT: sll $4, $2, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 65535 ++; MIPS32R2-NEXT: sllv $5, $2, $4 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: andi $2, $1, 65535 ++; MIPS32R2-NEXT: sllv $8, $2, $4 ++; MIPS32R2-NEXT: andi $2, $7, 65535 ++; MIPS32R2-NEXT: sllv $7, $2, $4 ++; MIPS32R2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $9, 0($3) ++; MIPS32R2-NEXT: and $10, $9, $5 ++; MIPS32R2-NEXT: bne $10, $8, $BB15_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R2-NEXT: and $9, $9, $6 ++; MIPS32R2-NEXT: or $9, $9, $7 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB15_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB15_3: ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $2, $10, $4 ++; MIPS32R2-NEXT: seh $2, $2 ++; MIPS32R2-NEXT: # %bb.4: ++; MIPS32R2-NEXT: seh $1, $1 ++; MIPS32R2-NEXT: xor $1, $2, $1 ++; MIPS32R2-NEXT: sltiu $3, $1, 1 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: foo: ++; MIPS32R6: # %bb.0: ++; MIPS32R6-NEXT: addu $1, $5, $6 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $4, $2 ++; MIPS32R6-NEXT: andi $2, $4, 3 ++; MIPS32R6-NEXT: sll $4, $2, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 65535 ++; MIPS32R6-NEXT: sllv $5, $2, $4 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: andi $2, $1, 65535 ++; MIPS32R6-NEXT: sllv $8, $2, $4 ++; MIPS32R6-NEXT: andi $2, $7, 65535 ++; MIPS32R6-NEXT: sllv $7, $2, $4 ++; MIPS32R6-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $9, 0($3) ++; MIPS32R6-NEXT: and $10, $9, $5 ++; MIPS32R6-NEXT: bnec $10, $8, $BB15_3 ++; MIPS32R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R6-NEXT: and $9, $9, $6 ++; MIPS32R6-NEXT: or $9, $9, $7 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB15_1 ++; MIPS32R6-NEXT: $BB15_3: ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $2, $10, $4 ++; MIPS32R6-NEXT: seh $2, $2 ++; MIPS32R6-NEXT: # %bb.4: ++; MIPS32R6-NEXT: seh $1, $1 ++; MIPS32R6-NEXT: xor $1, $2, $1 ++; MIPS32R6-NEXT: sltiu $3, $1, 1 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: foo: ++; MIPS32R6O0: # %bb.0: ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 ++; MIPS32R6O0-NEXT: move $1, $7 ++; MIPS32R6O0-NEXT: move $3, $4 ++; MIPS32R6O0-NEXT: # kill: def $a3 killed $at ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 ++; MIPS32R6O0-NEXT: addu $2, $5, $6 ++; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 65535 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 65535 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 65535 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 ++; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 ++; MIPS32R6O0-NEXT: $BB15_3: ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: xor $1, $2, $1 ++; MIPS32R6O0-NEXT: sltiu $3, $1, 1 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: foo: ++; MIPS4: # %bb.0: ++; MIPS4-NEXT: sll $1, $6, 0 ++; MIPS4-NEXT: sll $2, $5, 0 ++; MIPS4-NEXT: addu $1, $2, $1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: sll $2, $7, 0 ++; MIPS4-NEXT: daddiu $3, $zero, -4 ++; MIPS4-NEXT: and $3, $4, $3 ++; MIPS4-NEXT: andi $4, $4, 3 ++; MIPS4-NEXT: sll $4, $4, 3 ++; MIPS4-NEXT: ori $5, $zero, 65535 ++; MIPS4-NEXT: sllv $5, $5, $4 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: andi $7, $1, 65535 ++; MIPS4-NEXT: sllv $7, $7, $4 ++; MIPS4-NEXT: andi $2, $2, 65535 ++; MIPS4-NEXT: sllv $8, $2, $4 ++; MIPS4-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $9, 0($3) ++; MIPS4-NEXT: and $10, $9, $5 ++; MIPS4-NEXT: bne $10, $7, .LBB15_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS4-NEXT: and $9, $9, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB15_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB15_3: ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $2, $10, $4 ++; MIPS4-NEXT: sll $2, $2, 16 ++; MIPS4-NEXT: sra $2, $2, 16 ++; MIPS4-NEXT: # %bb.4: ++; MIPS4-NEXT: sll $1, $1, 16 ++; MIPS4-NEXT: sra $1, $1, 16 ++; MIPS4-NEXT: xor $1, $2, $1 ++; MIPS4-NEXT: sltiu $3, $1, 1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: foo: ++; MIPS64: # %bb.0: ++; MIPS64-NEXT: sll $1, $6, 0 ++; MIPS64-NEXT: sll $2, $5, 0 ++; MIPS64-NEXT: addu $1, $2, $1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: sll $2, $7, 0 ++; MIPS64-NEXT: daddiu $3, $zero, -4 ++; MIPS64-NEXT: and $3, $4, $3 ++; MIPS64-NEXT: andi $4, $4, 3 ++; MIPS64-NEXT: sll $4, $4, 3 ++; MIPS64-NEXT: ori $5, $zero, 65535 ++; MIPS64-NEXT: sllv $5, $5, $4 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: andi $7, $1, 65535 ++; MIPS64-NEXT: sllv $7, $7, $4 ++; MIPS64-NEXT: andi $2, $2, 65535 ++; MIPS64-NEXT: sllv $8, $2, $4 ++; MIPS64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $9, 0($3) ++; MIPS64-NEXT: and $10, $9, $5 ++; MIPS64-NEXT: bne $10, $7, .LBB15_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64-NEXT: and $9, $9, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB15_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB15_3: ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $2, $10, $4 ++; MIPS64-NEXT: sll $2, $2, 16 ++; MIPS64-NEXT: sra $2, $2, 16 ++; MIPS64-NEXT: # %bb.4: ++; MIPS64-NEXT: sll $1, $1, 16 ++; MIPS64-NEXT: sra $1, $1, 16 ++; MIPS64-NEXT: xor $1, $2, $1 ++; MIPS64-NEXT: sltiu $3, $1, 1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: foo: ++; MIPS64R2: # %bb.0: ++; MIPS64R2-NEXT: sll $1, $6, 0 ++; MIPS64R2-NEXT: sll $2, $5, 0 ++; MIPS64R2-NEXT: addu $1, $2, $1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: sll $2, $7, 0 ++; MIPS64R2-NEXT: daddiu $3, $zero, -4 ++; MIPS64R2-NEXT: and $3, $4, $3 ++; MIPS64R2-NEXT: andi $4, $4, 3 ++; MIPS64R2-NEXT: sll $4, $4, 3 ++; MIPS64R2-NEXT: ori $5, $zero, 65535 ++; MIPS64R2-NEXT: sllv $5, $5, $4 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: andi $7, $1, 65535 ++; MIPS64R2-NEXT: sllv $7, $7, $4 ++; MIPS64R2-NEXT: andi $2, $2, 65535 ++; MIPS64R2-NEXT: sllv $8, $2, $4 ++; MIPS64R2-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $9, 0($3) ++; MIPS64R2-NEXT: and $10, $9, $5 ++; MIPS64R2-NEXT: bne $10, $7, .LBB15_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R2-NEXT: and $9, $9, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB15_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB15_3: ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $2, $10, $4 ++; MIPS64R2-NEXT: seh $2, $2 ++; MIPS64R2-NEXT: # %bb.4: ++; MIPS64R2-NEXT: seh $1, $1 ++; MIPS64R2-NEXT: xor $1, $2, $1 ++; MIPS64R2-NEXT: sltiu $3, $1, 1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: foo: ++; MIPS64R6: # %bb.0: ++; MIPS64R6-NEXT: sll $1, $6, 0 ++; MIPS64R6-NEXT: sll $2, $5, 0 ++; MIPS64R6-NEXT: addu $1, $2, $1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: sll $2, $7, 0 ++; MIPS64R6-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6-NEXT: and $3, $4, $3 ++; MIPS64R6-NEXT: andi $4, $4, 3 ++; MIPS64R6-NEXT: sll $4, $4, 3 ++; MIPS64R6-NEXT: ori $5, $zero, 65535 ++; MIPS64R6-NEXT: sllv $5, $5, $4 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: andi $7, $1, 65535 ++; MIPS64R6-NEXT: sllv $7, $7, $4 ++; MIPS64R6-NEXT: andi $2, $2, 65535 ++; MIPS64R6-NEXT: sllv $8, $2, $4 ++; MIPS64R6-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $9, 0($3) ++; MIPS64R6-NEXT: and $10, $9, $5 ++; MIPS64R6-NEXT: bnec $10, $7, .LBB15_3 ++; MIPS64R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R6-NEXT: and $9, $9, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB15_1 ++; MIPS64R6-NEXT: .LBB15_3: ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $2, $10, $4 ++; MIPS64R6-NEXT: seh $2, $2 ++; MIPS64R6-NEXT: # %bb.4: ++; MIPS64R6-NEXT: seh $1, $1 ++; MIPS64R6-NEXT: xor $1, $2, $1 ++; MIPS64R6-NEXT: sltiu $3, $1, 1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: foo: ++; MIPS64R6O0: # %bb.0: ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 ++; MIPS64R6O0-NEXT: move $3, $4 ++; MIPS64R6O0-NEXT: move $1, $7 ++; MIPS64R6O0-NEXT: sll $1, $1, 0 ++; MIPS64R6O0-NEXT: move $2, $6 ++; MIPS64R6O0-NEXT: sll $4, $2, 0 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sll $2, $2, 0 ++; MIPS64R6O0-NEXT: addu $2, $2, $4 ++; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 2 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 65535 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 65535 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 65535 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 ++; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 ++; MIPS64R6O0-NEXT: .LBB15_3: ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: xor $1, $2, $1 ++; MIPS64R6O0-NEXT: sltiu $3, $1, 1 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: foo: ++; MM32: # %bb.0: ++; MM32-NEXT: addu16 $3, $5, $6 ++; MM32-NEXT: sync ++; MM32-NEXT: addiu $1, $zero, -4 ++; MM32-NEXT: and $1, $4, $1 ++; MM32-NEXT: andi $2, $4, 3 ++; MM32-NEXT: sll $4, $2, 3 ++; MM32-NEXT: ori $2, $zero, 65535 ++; MM32-NEXT: sllv $5, $2, $4 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: andi $2, $3, 65535 ++; MM32-NEXT: sllv $8, $2, $4 ++; MM32-NEXT: andi $2, $7, 65535 ++; MM32-NEXT: sllv $7, $2, $4 ++; MM32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $9, 0($1) ++; MM32-NEXT: and $10, $9, $5 ++; MM32-NEXT: bne $10, $8, $BB15_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MM32-NEXT: and $9, $9, $6 ++; MM32-NEXT: or $9, $9, $7 ++; MM32-NEXT: sc $9, 0($1) ++; MM32-NEXT: beqzc $9, $BB15_1 ++; MM32-NEXT: $BB15_3: ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $2, $10, $4 ++; MM32-NEXT: seh $2, $2 ++; MM32-NEXT: # %bb.4: ++; MM32-NEXT: seh $1, $3 ++; MM32-NEXT: xor $1, $2, $1 ++; MM32-NEXT: sltiu $3, $1, 1 ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: foo: ++; O1: # %bb.0: ++; O1-NEXT: addu $1, $5, $6 ++; O1-NEXT: sync ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $4, $2 ++; O1-NEXT: andi $2, $4, 3 ++; O1-NEXT: sll $4, $2, 3 ++; O1-NEXT: ori $2, $zero, 65535 ++; O1-NEXT: sllv $5, $2, $4 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: andi $2, $1, 65535 ++; O1-NEXT: sllv $8, $2, $4 ++; O1-NEXT: andi $2, $7, 65535 ++; O1-NEXT: sllv $7, $2, $4 ++; O1-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $9, 0($3) ++; O1-NEXT: and $10, $9, $5 ++; O1-NEXT: bne $10, $8, $BB15_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O1-NEXT: and $9, $9, $6 ++; O1-NEXT: or $9, $9, $7 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB15_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB15_3: ++; O1-NEXT: sync ++; O1-NEXT: srlv $2, $10, $4 ++; O1-NEXT: sll $2, $2, 16 ++; O1-NEXT: sra $2, $2, 16 ++; O1-NEXT: # %bb.4: ++; O1-NEXT: sll $1, $1, 16 ++; O1-NEXT: sra $1, $1, 16 ++; O1-NEXT: xor $1, $2, $1 ++; O1-NEXT: sltiu $3, $1, 1 ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: foo: ++; O2: # %bb.0: ++; O2-NEXT: addu $1, $5, $6 ++; O2-NEXT: sync ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $4, $2 ++; O2-NEXT: andi $2, $4, 3 ++; O2-NEXT: sll $4, $2, 3 ++; O2-NEXT: ori $2, $zero, 65535 ++; O2-NEXT: sllv $5, $2, $4 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: andi $2, $1, 65535 ++; O2-NEXT: sllv $8, $2, $4 ++; O2-NEXT: andi $2, $7, 65535 ++; O2-NEXT: sllv $7, $2, $4 ++; O2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $9, 0($3) ++; O2-NEXT: and $10, $9, $5 ++; O2-NEXT: bne $10, $8, $BB15_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O2-NEXT: and $9, $9, $6 ++; O2-NEXT: or $9, $9, $7 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB15_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB15_3: ++; O2-NEXT: sync ++; O2-NEXT: srlv $2, $10, $4 ++; O2-NEXT: sll $2, $2, 16 ++; O2-NEXT: sra $2, $2, 16 ++; O2-NEXT: # %bb.4: ++; O2-NEXT: sll $1, $1, 16 ++; O2-NEXT: sra $1, $1, 16 ++; O2-NEXT: xor $1, $2, $1 ++; O2-NEXT: sltiu $3, $1, 1 ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: foo: ++; O3: # %bb.0: ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: addu $1, $5, $6 ++; O3-NEXT: sync ++; O3-NEXT: and $3, $4, $2 ++; O3-NEXT: andi $2, $4, 3 ++; O3-NEXT: sll $4, $2, 3 ++; O3-NEXT: ori $2, $zero, 65535 ++; O3-NEXT: sllv $5, $2, $4 ++; O3-NEXT: andi $2, $1, 65535 ++; O3-NEXT: sll $1, $1, 16 ++; O3-NEXT: sllv $8, $2, $4 ++; O3-NEXT: andi $2, $7, 65535 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: sra $1, $1, 16 ++; O3-NEXT: sllv $7, $2, $4 ++; O3-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $9, 0($3) ++; O3-NEXT: and $10, $9, $5 ++; O3-NEXT: bne $10, $8, $BB15_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O3-NEXT: and $9, $9, $6 ++; O3-NEXT: or $9, $9, $7 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB15_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB15_3: ++; O3-NEXT: sync ++; O3-NEXT: srlv $2, $10, $4 ++; O3-NEXT: sll $2, $2, 16 ++; O3-NEXT: sra $2, $2, 16 ++; O3-NEXT: # %bb.4: ++; O3-NEXT: sync ++; O3-NEXT: xor $1, $2, $1 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $3, $1, 1 ++; ++; MIPS32EB-LABEL: foo: ++; MIPS32EB: # %bb.0: ++; MIPS32EB-NEXT: addu $1, $5, $6 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $4, $2 ++; MIPS32EB-NEXT: andi $2, $4, 3 ++; MIPS32EB-NEXT: xori $2, $2, 2 ++; MIPS32EB-NEXT: sll $4, $2, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 65535 ++; MIPS32EB-NEXT: sllv $5, $2, $4 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: andi $2, $1, 65535 ++; MIPS32EB-NEXT: sllv $8, $2, $4 ++; MIPS32EB-NEXT: andi $2, $7, 65535 ++; MIPS32EB-NEXT: sllv $7, $2, $4 ++; MIPS32EB-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $9, 0($3) ++; MIPS32EB-NEXT: and $10, $9, $5 ++; MIPS32EB-NEXT: bne $10, $8, $BB15_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32EB-NEXT: and $9, $9, $6 ++; MIPS32EB-NEXT: or $9, $9, $7 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB15_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB15_3: ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $2, $10, $4 ++; MIPS32EB-NEXT: sll $2, $2, 16 ++; MIPS32EB-NEXT: sra $2, $2, 16 ++; MIPS32EB-NEXT: # %bb.4: ++; MIPS32EB-NEXT: sll $1, $1, 16 ++; MIPS32EB-NEXT: sra $1, $1, 16 ++; MIPS32EB-NEXT: xor $1, $2, $1 ++; MIPS32EB-NEXT: sltiu $3, $1, 1 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++ %desired = add i16 %l, %r ++ %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst ++ ret {i16, i1} %res ++} ++ ++@countsint = common global i32 0, align 4 ++ ++define i32 @CheckSync(i32 signext %v) nounwind noinline { ++; MIPS32-LABEL: CheckSync: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: lw $1, %got(countsint)($1) ++; MIPS32-NEXT: $BB16_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB16_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: CheckSync: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: lw $3, %got(countsint)($1) ++; MIPS32O0-NEXT: $BB16_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB16_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: CheckSync: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: lw $1, %got(countsint)($1) ++; MIPS32R2-NEXT: $BB16_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB16_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: CheckSync: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: lw $1, %got(countsint)($1) ++; MIPS32R6-NEXT: $BB16_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB16_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: CheckSync: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) ++; MIPS32R6O0-NEXT: $BB16_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: CheckSync: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS4-NEXT: .LBB16_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB16_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: CheckSync: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64-NEXT: .LBB16_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB16_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: CheckSync: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64R2-NEXT: .LBB16_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB16_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: CheckSync: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64R6-NEXT: .LBB16_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB16_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: CheckSync: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) ++; MIPS64R6O0-NEXT: .LBB16_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: CheckSync: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sync ++; MM32-NEXT: lw $1, %got(countsint)($2) ++; MM32-NEXT: $BB16_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB16_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: CheckSync: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sync ++; O1-NEXT: lw $1, %got(countsint)($1) ++; O1-NEXT: $BB16_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB16_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: CheckSync: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sync ++; O2-NEXT: lw $1, %got(countsint)($1) ++; O2-NEXT: $BB16_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB16_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: CheckSync: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sync ++; O3-NEXT: lw $1, %got(countsint)($1) ++; O3-NEXT: $BB16_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB16_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: CheckSync: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: lw $1, %got(countsint)($1) ++; MIPS32EB-NEXT: $BB16_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB16_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* @countsint, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; make sure that this assertion in ++; TwoAddressInstructionPass::TryInstructionTransform does not fail: ++; ++; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) && ++; ++; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an ++; operand of an atomic instruction with register $zero. ++@a = external global i32 ++ ++define i32 @zeroreg() nounwind { ++; MIPS32-LABEL: zeroreg: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: addiu $2, $zero, 0 ++; MIPS32-NEXT: addiu $3, $zero, 1 ++; MIPS32-NEXT: lw $1, %got(a)($1) ++; MIPS32-NEXT: $BB17_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $4, 0($1) ++; MIPS32-NEXT: bne $4, $3, $BB17_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32-NEXT: move $5, $2 ++; MIPS32-NEXT: sc $5, 0($1) ++; MIPS32-NEXT: beqz $5, $BB17_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB17_3: # %entry ++; MIPS32-NEXT: xor $1, $4, $3 ++; MIPS32-NEXT: sltiu $2, $1, 1 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: zeroreg: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: lw $4, %got(a)($1) ++; MIPS32O0-NEXT: addiu $6, $zero, 0 ++; MIPS32O0-NEXT: addiu $2, $zero, 1 ++; MIPS32O0-NEXT: move $5, $2 ++; MIPS32O0-NEXT: $BB17_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $1, 0($4) ++; MIPS32O0-NEXT: bne $1, $5, $BB17_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32O0-NEXT: move $3, $6 ++; MIPS32O0-NEXT: sc $3, 0($4) ++; MIPS32O0-NEXT: beqz $3, $BB17_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB17_3: # %entry ++; MIPS32O0-NEXT: xor $2, $1, $2 ++; MIPS32O0-NEXT: sltiu $2, $2, 1 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $2, $zero, 1 ++; MIPS32O0-NEXT: xor $1, $1, $2 ++; MIPS32O0-NEXT: sltiu $1, $1, 1 ++; MIPS32O0-NEXT: andi $2, $1, 1 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: zeroreg: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: addiu $2, $zero, 0 ++; MIPS32R2-NEXT: addiu $3, $zero, 1 ++; MIPS32R2-NEXT: lw $1, %got(a)($1) ++; MIPS32R2-NEXT: $BB17_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $4, 0($1) ++; MIPS32R2-NEXT: bne $4, $3, $BB17_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R2-NEXT: move $5, $2 ++; MIPS32R2-NEXT: sc $5, 0($1) ++; MIPS32R2-NEXT: beqz $5, $BB17_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB17_3: # %entry ++; MIPS32R2-NEXT: xor $1, $4, $3 ++; MIPS32R2-NEXT: sltiu $2, $1, 1 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: zeroreg: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: addiu $2, $zero, 0 ++; MIPS32R6-NEXT: addiu $3, $zero, 1 ++; MIPS32R6-NEXT: lw $1, %got(a)($1) ++; MIPS32R6-NEXT: $BB17_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $4, 0($1) ++; MIPS32R6-NEXT: bnec $4, $3, $BB17_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R6-NEXT: move $5, $2 ++; MIPS32R6-NEXT: sc $5, 0($1) ++; MIPS32R6-NEXT: beqzc $5, $BB17_1 ++; MIPS32R6-NEXT: $BB17_3: # %entry ++; MIPS32R6-NEXT: xor $1, $4, $3 ++; MIPS32R6-NEXT: sltiu $2, $1, 1 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: zeroreg: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: lw $4, %got(a)($1) ++; MIPS32R6O0-NEXT: addiu $6, $zero, 0 ++; MIPS32R6O0-NEXT: addiu $2, $zero, 1 ++; MIPS32R6O0-NEXT: move $5, $2 ++; MIPS32R6O0-NEXT: $BB17_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $1, 0($4) ++; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R6O0-NEXT: move $3, $6 ++; MIPS32R6O0-NEXT: sc $3, 0($4) ++; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 ++; MIPS32R6O0-NEXT: $BB17_3: # %entry ++; MIPS32R6O0-NEXT: xor $1, $1, $2 ++; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: zeroreg: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: addiu $2, $zero, 0 ++; MIPS4-NEXT: addiu $3, $zero, 1 ++; MIPS4-NEXT: ld $1, %got_disp(a)($1) ++; MIPS4-NEXT: .LBB17_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $4, 0($1) ++; MIPS4-NEXT: bne $4, $3, .LBB17_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS4-NEXT: move $5, $2 ++; MIPS4-NEXT: sc $5, 0($1) ++; MIPS4-NEXT: beqz $5, .LBB17_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB17_3: # %entry ++; MIPS4-NEXT: xor $1, $4, $3 ++; MIPS4-NEXT: sltiu $2, $1, 1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: zeroreg: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: addiu $2, $zero, 0 ++; MIPS64-NEXT: addiu $3, $zero, 1 ++; MIPS64-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64-NEXT: .LBB17_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $4, 0($1) ++; MIPS64-NEXT: bne $4, $3, .LBB17_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64-NEXT: move $5, $2 ++; MIPS64-NEXT: sc $5, 0($1) ++; MIPS64-NEXT: beqz $5, .LBB17_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB17_3: # %entry ++; MIPS64-NEXT: xor $1, $4, $3 ++; MIPS64-NEXT: sltiu $2, $1, 1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: zeroreg: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: addiu $2, $zero, 0 ++; MIPS64R2-NEXT: addiu $3, $zero, 1 ++; MIPS64R2-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64R2-NEXT: .LBB17_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $4, 0($1) ++; MIPS64R2-NEXT: bne $4, $3, .LBB17_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R2-NEXT: move $5, $2 ++; MIPS64R2-NEXT: sc $5, 0($1) ++; MIPS64R2-NEXT: beqz $5, .LBB17_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB17_3: # %entry ++; MIPS64R2-NEXT: xor $1, $4, $3 ++; MIPS64R2-NEXT: sltiu $2, $1, 1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: zeroreg: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: addiu $2, $zero, 0 ++; MIPS64R6-NEXT: addiu $3, $zero, 1 ++; MIPS64R6-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64R6-NEXT: .LBB17_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $4, 0($1) ++; MIPS64R6-NEXT: bnec $4, $3, .LBB17_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R6-NEXT: move $5, $2 ++; MIPS64R6-NEXT: sc $5, 0($1) ++; MIPS64R6-NEXT: beqzc $5, .LBB17_1 ++; MIPS64R6-NEXT: .LBB17_3: # %entry ++; MIPS64R6-NEXT: xor $1, $4, $3 ++; MIPS64R6-NEXT: sltiu $2, $1, 1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: zeroreg: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1) ++; MIPS64R6O0-NEXT: addiu $6, $zero, 0 ++; MIPS64R6O0-NEXT: addiu $2, $zero, 1 ++; MIPS64R6O0-NEXT: move $5, $2 ++; MIPS64R6O0-NEXT: .LBB17_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $1, 0($4) ++; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R6O0-NEXT: move $3, $6 ++; MIPS64R6O0-NEXT: sc $3, 0($4) ++; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 ++; MIPS64R6O0-NEXT: .LBB17_3: # %entry ++; MIPS64R6O0-NEXT: xor $1, $1, $2 ++; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: zeroreg: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sync ++; MM32-NEXT: li16 $3, 0 ++; MM32-NEXT: li16 $4, 1 ++; MM32-NEXT: lw $1, %got(a)($2) ++; MM32-NEXT: $BB17_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: bne $2, $4, $BB17_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MM32-NEXT: move $5, $3 ++; MM32-NEXT: sc $5, 0($1) ++; MM32-NEXT: beqzc $5, $BB17_1 ++; MM32-NEXT: $BB17_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: xor $1, $2, $4 ++; MM32-NEXT: sltiu $2, $1, 1 ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: zeroreg: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sync ++; O1-NEXT: addiu $2, $zero, 0 ++; O1-NEXT: addiu $3, $zero, 1 ++; O1-NEXT: lw $1, %got(a)($1) ++; O1-NEXT: $BB17_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $4, 0($1) ++; O1-NEXT: bne $4, $3, $BB17_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O1-NEXT: move $5, $2 ++; O1-NEXT: sc $5, 0($1) ++; O1-NEXT: beqz $5, $BB17_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB17_3: # %entry ++; O1-NEXT: xor $1, $4, $3 ++; O1-NEXT: sltiu $2, $1, 1 ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: zeroreg: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sync ++; O2-NEXT: addiu $2, $zero, 0 ++; O2-NEXT: addiu $3, $zero, 1 ++; O2-NEXT: lw $1, %got(a)($1) ++; O2-NEXT: $BB17_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $4, 0($1) ++; O2-NEXT: bne $4, $3, $BB17_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O2-NEXT: move $5, $2 ++; O2-NEXT: sc $5, 0($1) ++; O2-NEXT: beqz $5, $BB17_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB17_3: # %entry ++; O2-NEXT: xor $1, $4, $3 ++; O2-NEXT: sltiu $2, $1, 1 ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: zeroreg: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, 0 ++; O3-NEXT: addiu $3, $zero, 1 ++; O3-NEXT: sync ++; O3-NEXT: lw $1, %got(a)($1) ++; O3-NEXT: $BB17_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $4, 0($1) ++; O3-NEXT: bne $4, $3, $BB17_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O3-NEXT: move $5, $2 ++; O3-NEXT: sc $5, 0($1) ++; O3-NEXT: beqz $5, $BB17_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB17_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: xor $1, $4, $3 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32EB-LABEL: zeroreg: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: addiu $2, $zero, 0 ++; MIPS32EB-NEXT: addiu $3, $zero, 1 ++; MIPS32EB-NEXT: lw $1, %got(a)($1) ++; MIPS32EB-NEXT: $BB17_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $4, 0($1) ++; MIPS32EB-NEXT: bne $4, $3, $BB17_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32EB-NEXT: move $5, $2 ++; MIPS32EB-NEXT: sc $5, 0($1) ++; MIPS32EB-NEXT: beqz $5, $BB17_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB17_3: # %entry ++; MIPS32EB-NEXT: xor $1, $4, $3 ++; MIPS32EB-NEXT: sltiu $2, $1, 1 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst ++ %0 = extractvalue { i32, i1 } %pair0, 0 ++ %1 = icmp eq i32 %0, 1 ++ %conv = zext i1 %1 to i32 ++ ret i32 %conv ++} ++ ++; Check that MIPS32R6 has the correct offset range. ++; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store. ++define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: addiu $1, $1, 1024 ++; MIPS32-NEXT: $BB18_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB18_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(x)($1) ++; MIPS32O0-NEXT: addiu $3, $1, 1024 ++; MIPS32O0-NEXT: $BB18_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB18_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: addiu $1, $1, 1024 ++; MIPS32R2-NEXT: $BB18_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB18_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: addiu $1, $1, 1024 ++; MIPS32R6-NEXT: $BB18_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB18_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $1, %got(x)($1) ++; MIPS32R6O0-NEXT: addiu $3, $1, 1024 ++; MIPS32R6O0-NEXT: $BB18_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: daddiu $1, $1, 1024 ++; MIPS4-NEXT: .LBB18_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB18_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: daddiu $1, $1, 1024 ++; MIPS64-NEXT: .LBB18_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB18_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: daddiu $1, $1, 1024 ++; MIPS64R2-NEXT: .LBB18_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB18_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: daddiu $1, $1, 1024 ++; MIPS64R6-NEXT: .LBB18_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB18_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 ++; MIPS64R6O0-NEXT: .LBB18_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: addiu $1, $1, 1024 ++; MM32-NEXT: $BB18_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB18_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: addiu $1, $1, 1024 ++; O1-NEXT: $BB18_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB18_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: addiu $1, $1, 1024 ++; O2-NEXT: $BB18_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB18_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: addiu $1, $1, 1024 ++; O3-NEXT: $BB18_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB18_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: addiu $1, $1, 1024 ++; MIPS32EB-NEXT: $BB18_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB18_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* getelementptr(i32, i32* @x, i32 256), i32 %incr monotonic ++ ret i32 %0 ++ ++} +diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll +index c8b67eda1..4005ea17e 100644 +--- a/llvm/test/CodeGen/Mips/atomic.ll ++++ b/llvm/test/CodeGen/Mips/atomic.ll +@@ -1,35 +1,35 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32O0 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R2 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R6 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R6O0 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS4 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6 +-; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MM32 + + ; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + + ; Keep one big-endian check so that we don't reduce testing, but don't add more + ; since endianness doesn't affect the body of the atomic operations. +-; RUN: llc -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32EB + + @x = common global i32 0, align 4 +diff --git a/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll +new file mode 100644 +index 000000000..48c202940 +--- /dev/null ++++ b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll +@@ -0,0 +1,1377 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS4 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ++ ++; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++ ++; Keep one big-endian check so that we don't reduce testing, but don't add more ++; since endianness doesn't affect the body of the atomic operations. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64EB ++ ++@x = common global i64 0, align 4 ++ ++define i64 @AtomicLoadAdd(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadAdd: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB0_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: daddu $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB0_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB0_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: daddu $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB0_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB0_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: daddu $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB0_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB0_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: daddu $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB0_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB0_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: daddu $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB0_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: daddu $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB0_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB0_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: daddu $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB0_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB0_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: daddu $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB0_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadAdd: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB0_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: daddu $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB0_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadSub(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadSub: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB1_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: dsubu $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB1_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB1_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: dsubu $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB1_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB1_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: dsubu $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB1_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB1_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: dsubu $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB1_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB1_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: dsubu $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB1_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: dsubu $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB1_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB1_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: dsubu $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB1_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB1_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: dsubu $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB1_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadSub: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB1_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: dsubu $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB1_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadAnd(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadAnd: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB2_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB2_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAnd: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB2_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB2_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAnd: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB2_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB2_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAnd: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB2_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB2_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAnd: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB2_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAnd: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB2_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB2_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAnd: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB2_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB2_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAnd: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB2_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB2_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadAnd: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB2_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: and $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB2_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw and i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadOr(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadOr: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB3_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: or $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB3_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadOr: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB3_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: or $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB3_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadOr: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB3_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: or $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB3_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadOr: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB3_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: or $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB3_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadOr: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB3_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: or $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadOr: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB3_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: or $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB3_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadOr: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB3_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: or $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB3_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadOr: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB3_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: or $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB3_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadOr: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB3_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: or $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB3_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw or i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadXor(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadXor: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB4_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: xor $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB4_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadXor: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB4_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: xor $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB4_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadXor: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB4_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: xor $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB4_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadXor: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB4_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: xor $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB4_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadXor: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB4_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: xor $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadXor: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB4_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: xor $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB4_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadXor: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB4_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: xor $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB4_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadXor: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB4_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: xor $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB4_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadXor: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB4_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: xor $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB4_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw xor i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadNand(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadNand: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB5_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: nor $3, $zero, $3 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB5_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB5_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: nor $3, $zero, $3 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB5_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB5_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: nor $3, $zero, $3 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB5_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB5_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: nor $3, $zero, $3 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB5_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB5_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: nor $1, $zero, $1 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB5_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: nor $3, $zero, $3 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB5_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB5_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: nor $3, $zero, $3 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB5_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB5_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: nor $3, $zero, $3 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB5_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadNand: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB5_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: and $3, $2, $4 ++; MIPS64EB-NEXT: nor $3, $zero, $3 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB5_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicSwap64(i64 signext %newval) nounwind { ++; MIPS4-LABEL: AtomicSwap64: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS4-NEXT: sd $4, 8($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB6_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: move $3, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB6_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicSwap64: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64-NEXT: sd $4, 8($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB6_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: move $3, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB6_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicSwap64: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R2-NEXT: sd $4, 8($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB6_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: move $3, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB6_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicSwap64: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6-NEXT: sd $4, 8($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB6_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: move $3, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB6_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicSwap64: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6O0-NEXT: sd $4, 8($sp) ++; MIPS64R6O0-NEXT: ld $4, 8($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB6_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap64: ++; O1: # %bb.0: # %entry ++; O1-NEXT: daddiu $sp, $sp, -16 ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O1-NEXT: sd $4, 8($sp) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB6_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: move $3, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB6_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: daddiu $sp, $sp, 16 ++; ++; O2-LABEL: AtomicSwap64: ++; O2: # %bb.0: # %entry ++; O2-NEXT: daddiu $sp, $sp, -16 ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O2-NEXT: sd $4, 8($sp) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB6_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: move $3, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB6_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: daddiu $sp, $sp, 16 ++; ++; O3-LABEL: AtomicSwap64: ++; O3: # %bb.0: # %entry ++; O3-NEXT: daddiu $sp, $sp, -16 ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O3-NEXT: sd $4, 8($sp) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB6_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: move $3, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB6_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64EB-LABEL: AtomicSwap64: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: daddiu $sp, $sp, -16 ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64EB-NEXT: sd $4, 8($sp) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB6_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: move $3, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB6_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: daddiu $sp, $sp, 16 ++entry: ++ %newval.addr = alloca i64, align 4 ++ store i64 %newval, i64* %newval.addr, align 4 ++ %tmp = load i64, i64* %newval.addr, align 4 ++ %0 = atomicrmw xchg i64* @x, i64 %tmp monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind { ++; MIPS4-LABEL: AtomicCmpSwap64: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS4-NEXT: sd $5, 8($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB7_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: bne $2, $4, .LBB7_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS4-NEXT: move $3, $5 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB7_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB7_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicCmpSwap64: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64-NEXT: sd $5, 8($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB7_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64-NEXT: move $3, $5 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB7_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB7_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicCmpSwap64: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R2-NEXT: sd $5, 8($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB7_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R2-NEXT: move $3, $5 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB7_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB7_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicCmpSwap64: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6-NEXT: sd $5, 8($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB7_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6-NEXT: move $3, $5 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB7_1 ++; MIPS64R6-NEXT: .LBB7_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap64: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6O0-NEXT: sd $5, 8($sp) ++; MIPS64R6O0-NEXT: ld $5, 8($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB7_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ++; MIPS64R6O0-NEXT: .LBB7_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap64: ++; O1: # %bb.0: # %entry ++; O1-NEXT: daddiu $sp, $sp, -16 ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O1-NEXT: sd $5, 8($sp) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB7_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: bne $2, $4, .LBB7_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O1-NEXT: move $3, $5 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB7_1 ++; O1-NEXT: nop ++; O1-NEXT: .LBB7_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: daddiu $sp, $sp, 16 ++; ++; O2-LABEL: AtomicCmpSwap64: ++; O2: # %bb.0: # %entry ++; O2-NEXT: daddiu $sp, $sp, -16 ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O2-NEXT: sd $5, 8($sp) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB7_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: bne $2, $4, .LBB7_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O2-NEXT: move $3, $5 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB7_1 ++; O2-NEXT: nop ++; O2-NEXT: .LBB7_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: daddiu $sp, $sp, 16 ++; ++; O3-LABEL: AtomicCmpSwap64: ++; O3: # %bb.0: # %entry ++; O3-NEXT: daddiu $sp, $sp, -16 ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O3-NEXT: sd $5, 8($sp) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB7_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: bne $2, $4, .LBB7_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O3-NEXT: move $3, $5 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB7_1 ++; O3-NEXT: nop ++; O3-NEXT: .LBB7_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64EB-LABEL: AtomicCmpSwap64: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: daddiu $sp, $sp, -16 ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64EB-NEXT: sd $5, 8($sp) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB7_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64EB-NEXT: move $3, $5 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB7_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: .LBB7_3: # %entry ++; MIPS64EB-NEXT: sync ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: daddiu $sp, $sp, 16 ++entry: ++ %newval.addr = alloca i64, align 4 ++ store i64 %newval, i64* %newval.addr, align 4 ++ %tmp = load i64, i64* %newval.addr, align 4 ++ %0 = cmpxchg i64* @x, i64 %oldval, i64 %tmp monotonic monotonic ++ %1 = extractvalue { i64, i1 } %0, 0 ++ ret i64 %1 ++ ++} +diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll +index d27c9ac42..6e647211d 100644 +--- a/llvm/test/CodeGen/Mips/atomic64.ll ++++ b/llvm/test/CodeGen/Mips/atomic64.ll +@@ -1,23 +1,23 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS4 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6 +-; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 + + ; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + + ; Keep one big-endian check so that we don't reduce testing, but don't add more + ; since endianness doesn't affect the body of the atomic operations. +-; RUN: llc -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64EB + + @x = common global i64 0, align 4 +diff --git a/llvm/test/MC/LoongArch/cgprofile.ll b/llvm/test/MC/LoongArch/cgprofile.ll +new file mode 100644 +index 000000000..686dd6a0a +--- /dev/null ++++ b/llvm/test/MC/LoongArch/cgprofile.ll +@@ -0,0 +1,63 @@ ++;; Copied from llvm/test/MC/ELF/cgprofile.ll but use different triple. ++ ++; RUN: llc -filetype=asm %s -o - --mtriple=loongarch64-linux-linux-gnu | FileCheck %s ++; RUN: llc -filetype=obj %s -o %t --mtriple=loongarch64-linux-linux-gnu ++; RUN: llvm-readobj -r --cg-profile %t | FileCheck %s --check-prefix=OBJ ++ ++declare void @b() ++ ++define void @a() { ++ call void @b() ++ ret void ++} ++ ++define void @freq(i1 %cond) { ++ br i1 %cond, label %A, label %B ++A: ++ call void @a(); ++ ret void ++B: ++ call void @b(); ++ ret void ++} ++ ++!llvm.module.flags = !{!0} ++ ++!0 = !{i32 5, !"CG Profile", !1} ++!1 = !{!2, !3, !4, !5} ++!2 = !{void ()* @a, void ()* @b, i64 32} ++!3 = !{void (i1)* @freq, void ()* @a, i64 11} ++!4 = !{void (i1)* @freq, void ()* @b, i64 20} ++!5 = !{void (i1)* @freq, null, i64 20} ++ ++; CHECK: .cg_profile a, b, 32 ++; CHECK: .cg_profile freq, a, 11 ++; CHECK: .cg_profile freq, b, 20 ++ ++; OBJ: Relocations [ ++; OBJ: Section ({{.*}}) .rel.llvm.call-graph-profile { ++; OBJ-NEXT: 0x0 R_LARCH_NONE a ++; OBJ-NEXT: 0x0 R_LARCH_NONE b ++; OBJ-NEXT: 0x8 R_LARCH_NONE freq ++; OBJ-NEXT: 0x8 R_LARCH_NONE a ++; OBJ-NEXT: 0x10 R_LARCH_NONE freq ++; OBJ-NEXT: 0x10 R_LARCH_NONE b ++; OBJ-NEXT: } ++ ++; OBJ: CGProfile [ ++; OBJ: CGProfileEntry { ++; OBJ: From: a ++; OBJ: To: b ++; OBJ: Weight: 32 ++; OBJ: } ++; OBJ: CGProfileEntry { ++; OBJ: From: freq ++; OBJ: To: a ++; OBJ: Weight: 11 ++; OBJ: } ++; OBJ: CGProfileEntry { ++; OBJ: From: freq ++; OBJ: To: b ++; OBJ: Weight: 20 ++; OBJ: } ++; OBJ:] +diff --git a/llvm/test/MC/LoongArch/cgprofile.s b/llvm/test/MC/LoongArch/cgprofile.s +new file mode 100644 +index 000000000..53f59e5d3 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/cgprofile.s +@@ -0,0 +1,30 @@ ++## Copied from llvm/test/MC/ELF/cgprofile.s but use different triple. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-linux-gnu %s -o - | llvm-readobj -r -S --symbols --sd --cg-profile - | FileCheck %s ++ ++ .section .test,"aw",@progbits ++a: .word b ++ ++ .cg_profile a, b, 32 ++ .cg_profile freq, a, 11 ++ .cg_profile late, late2, 20 ++ .cg_profile .L.local, b, 42 ++ ++ .globl late ++late: ++late2: .word 0 ++late3: ++.L.local: ++ ++# CHECK: Relocations [ ++# CHECK: Section ({{.*}}) .rel.llvm.call-graph-profile { ++# CHECK-NEXT: 0x0 R_LARCH_NONE a ++# CHECK-NEXT: 0x0 R_LARCH_NONE b ++# CHECK-NEXT: 0x8 R_LARCH_NONE freq ++# CHECK-NEXT: 0x8 R_LARCH_NONE a ++# CHECK-NEXT: 0x10 R_LARCH_NONE late ++# CHECK-NEXT: 0x10 R_LARCH_NONE late2 ++# CHECK-NEXT: 0x18 R_LARCH_NONE .test ++# CHECK-NEXT: 0x18 R_LARCH_NONE b ++# CHECK-NEXT: } ++# CHECK-NEXT: ] +diff --git a/llvm/test/MC/LoongArch/data_half.s b/llvm/test/MC/LoongArch/data_half.s +new file mode 100644 +index 000000000..a8efeaace +--- /dev/null ++++ b/llvm/test/MC/LoongArch/data_half.s +@@ -0,0 +1,13 @@ ++# RUN: llvm-mc --triple=loongarch64 < %s | FileCheck %s ++ ++.data ++ ++# CHECK: .half 1 ++# CHECK-NEXT: .half 65535 ++.half 0x1 ++.half 0xffff ++ ++# CHECK: .half 1 ++# CHECK-NEXT: .half 65535 ++.2byte 0x1 ++.2byte 0xffff +diff --git a/llvm/test/MC/LoongArch/reloc-directive-err.s b/llvm/test/MC/LoongArch/reloc-directive-err.s +new file mode 100644 +index 000000000..60fd14556 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/reloc-directive-err.s +@@ -0,0 +1,7 @@ ++# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 %s -o /dev/null 2>&1 \ ++# RUN: | FileCheck %s ++ ++# PRINT: .reloc 0, R_INVALID, 0 ++# CHECK: {{.*}}.s:[[# @LINE+1]]:11: error: unknown relocation name ++.reloc 0, R_INVALID, 0 +diff --git a/llvm/test/MC/LoongArch/reloc-directive.s b/llvm/test/MC/LoongArch/reloc-directive.s +new file mode 100644 +index 000000000..282da7f28 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/reloc-directive.s +@@ -0,0 +1,177 @@ ++# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ ++# RUN: | llvm-readobj -r - | FileCheck %s ++ ++# PRINT: .reloc 0, R_LARCH_NONE, 0 ++# PRINT-NEXT: .reloc 1, R_LARCH_32, 1 ++# PRINT-NEXT: .reloc 2, R_LARCH_64, 2 ++# PRINT-NEXT: .reloc 3, R_LARCH_RELATIVE, 3 ++# PRINT-NEXT: .reloc 4, R_LARCH_COPY, 4 ++# PRINT-NEXT: .reloc 5, R_LARCH_JUMP_SLOT, 5 ++# PRINT-NEXT: .reloc 6, R_LARCH_TLS_DTPMOD32, 6 ++# PRINT-NEXT: .reloc 7, R_LARCH_TLS_DTPMOD64, 7 ++# PRINT-NEXT: .reloc 8, R_LARCH_TLS_DTPREL32, 8 ++# PRINT-NEXT: .reloc 9, R_LARCH_TLS_DTPREL64, 9 ++# PRINT-NEXT: .reloc 10, R_LARCH_TLS_TPREL32, 10 ++# PRINT-NEXT: .reloc 11, R_LARCH_TLS_TPREL64, 11 ++# PRINT-NEXT: .reloc 12, R_LARCH_IRELATIVE, 12 ++# PRINT-NEXT: .reloc 13, BFD_RELOC_NONE, 13 ++# PRINT-NEXT: .reloc 14, BFD_RELOC_32, 14 ++# PRINT-NEXT: .reloc 15, BFD_RELOC_64, 15 ++# PRINT-NEXT: .reloc 20, R_LARCH_MARK_LA, 20 ++# PRINT-NEXT: .reloc 21, R_LARCH_MARK_PCREL, 21 ++# PRINT-NEXT: .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 ++# PRINT-NEXT: .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 ++# PRINT-NEXT: .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 ++# PRINT-NEXT: .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 ++# PRINT-NEXT: .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 ++# PRINT-NEXT: .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 ++# PRINT-NEXT: .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 ++# PRINT-NEXT: .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 ++# PRINT-NEXT: .reloc 30, R_LARCH_SOP_ASSERT, 30 ++# PRINT-NEXT: .reloc 31, R_LARCH_SOP_NOT, 31 ++# PRINT-NEXT: .reloc 32, R_LARCH_SOP_SUB, 32 ++# PRINT-NEXT: .reloc 33, R_LARCH_SOP_SL, 33 ++# PRINT-NEXT: .reloc 34, R_LARCH_SOP_SR, 34 ++# PRINT-NEXT: .reloc 35, R_LARCH_SOP_ADD, 35 ++# PRINT-NEXT: .reloc 36, R_LARCH_SOP_AND, 36 ++# PRINT-NEXT: .reloc 37, R_LARCH_SOP_IF_ELSE, 37 ++# PRINT-NEXT: .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 ++# PRINT-NEXT: .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 ++# PRINT-NEXT: .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 ++# PRINT-NEXT: .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 ++# PRINT-NEXT: .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 ++# PRINT-NEXT: .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 ++# PRINT-NEXT: .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 ++# PRINT-NEXT: .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 ++# PRINT-NEXT: .reloc 46, R_LARCH_SOP_POP_32_U, 46 ++# PRINT-NEXT: .reloc 47, R_LARCH_ADD8, 47 ++# PRINT-NEXT: .reloc 48, R_LARCH_ADD16, 48 ++# PRINT-NEXT: .reloc 49, R_LARCH_ADD24, 49 ++# PRINT-NEXT: .reloc 50, R_LARCH_ADD32, 50 ++# PRINT-NEXT: .reloc 51, R_LARCH_ADD64, 51 ++# PRINT-NEXT: .reloc 52, R_LARCH_SUB8, 52 ++# PRINT-NEXT: .reloc 53, R_LARCH_SUB16, 53 ++# PRINT-NEXT: .reloc 54, R_LARCH_SUB24, 54 ++# PRINT-NEXT: .reloc 55, R_LARCH_SUB32, 55 ++# PRINT-NEXT: .reloc 56, R_LARCH_SUB64, 56 ++# PRINT-NEXT: .reloc 57, R_LARCH_GNU_VTINHERIT, 57 ++# PRINT-NEXT: .reloc 58, R_LARCH_GNU_VTENTRY, 58 ++ ++.text ++ .fill 59, 1, 0x0 ++ .reloc 0, R_LARCH_NONE, 0 ++ .reloc 1, R_LARCH_32, 1 ++ .reloc 2, R_LARCH_64, 2 ++ .reloc 3, R_LARCH_RELATIVE, 3 ++ .reloc 4, R_LARCH_COPY, 4 ++ .reloc 5, R_LARCH_JUMP_SLOT, 5 ++ .reloc 6, R_LARCH_TLS_DTPMOD32, 6 ++ .reloc 7, R_LARCH_TLS_DTPMOD64, 7 ++ .reloc 8, R_LARCH_TLS_DTPREL32, 8 ++ .reloc 9, R_LARCH_TLS_DTPREL64, 9 ++ .reloc 10, R_LARCH_TLS_TPREL32, 10 ++ .reloc 11, R_LARCH_TLS_TPREL64, 11 ++ .reloc 12, R_LARCH_IRELATIVE, 12 ++ .reloc 13, BFD_RELOC_NONE, 13 ++ .reloc 14, BFD_RELOC_32, 14 ++ .reloc 15, BFD_RELOC_64, 15 ++ .reloc 20, R_LARCH_MARK_LA, 20 ++ .reloc 21, R_LARCH_MARK_PCREL, 21 ++ .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 ++ .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 ++ .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 ++ .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 ++ .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 ++ .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 ++ .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 ++ .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 ++ .reloc 30, R_LARCH_SOP_ASSERT, 30 ++ .reloc 31, R_LARCH_SOP_NOT, 31 ++ .reloc 32, R_LARCH_SOP_SUB, 32 ++ .reloc 33, R_LARCH_SOP_SL, 33 ++ .reloc 34, R_LARCH_SOP_SR, 34 ++ .reloc 35, R_LARCH_SOP_ADD, 35 ++ .reloc 36, R_LARCH_SOP_AND, 36 ++ .reloc 37, R_LARCH_SOP_IF_ELSE, 37 ++ .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 ++ .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 ++ .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 ++ .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 ++ .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 ++ .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 ++ .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 ++ .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 ++ .reloc 46, R_LARCH_SOP_POP_32_U, 46 ++ .reloc 47, R_LARCH_ADD8, 47 ++ .reloc 48, R_LARCH_ADD16, 48 ++ .reloc 49, R_LARCH_ADD24, 49 ++ .reloc 50, R_LARCH_ADD32, 50 ++ .reloc 51, R_LARCH_ADD64, 51 ++ .reloc 52, R_LARCH_SUB8, 52 ++ .reloc 53, R_LARCH_SUB16, 53 ++ .reloc 54, R_LARCH_SUB24, 54 ++ .reloc 55, R_LARCH_SUB32, 55 ++ .reloc 56, R_LARCH_SUB64, 56 ++ .reloc 57, R_LARCH_GNU_VTINHERIT, 57 ++ .reloc 58, R_LARCH_GNU_VTENTRY, 58 ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.text { ++# CHECK-NEXT: 0x0 R_LARCH_NONE - 0x0 ++# CHECK-NEXT: 0x1 R_LARCH_32 - 0x1 ++# CHECK-NEXT: 0x2 R_LARCH_64 - 0x2 ++# CHECK-NEXT: 0x3 R_LARCH_RELATIVE - 0x3 ++# CHECK-NEXT: 0x4 R_LARCH_COPY - 0x4 ++# CHECK-NEXT: 0x5 R_LARCH_JUMP_SLOT - 0x5 ++# CHECK-NEXT: 0x6 R_LARCH_TLS_DTPMOD32 - 0x6 ++# CHECK-NEXT: 0x7 R_LARCH_TLS_DTPMOD64 - 0x7 ++# CHECK-NEXT: 0x8 R_LARCH_TLS_DTPREL32 - 0x8 ++# CHECK-NEXT: 0x9 R_LARCH_TLS_DTPREL64 - 0x9 ++# CHECK-NEXT: 0xA R_LARCH_TLS_TPREL32 - 0xA ++# CHECK-NEXT: 0xB R_LARCH_TLS_TPREL64 - 0xB ++# CHECK-NEXT: 0xC R_LARCH_IRELATIVE - 0xC ++# CHECK-NEXT: 0xD R_LARCH_NONE - 0xD ++# CHECK-NEXT: 0xE R_LARCH_32 - 0xE ++# CHECK-NEXT: 0xF R_LARCH_64 - 0xF ++# CHECK-NEXT: 0x14 R_LARCH_MARK_LA - 0x14 ++# CHECK-NEXT: 0x15 R_LARCH_MARK_PCREL - 0x15 ++# CHECK-NEXT: 0x16 R_LARCH_SOP_PUSH_PCREL - 0x16 ++# CHECK-NEXT: 0x17 R_LARCH_SOP_PUSH_ABSOLUTE - 0x17 ++# CHECK-NEXT: 0x18 R_LARCH_SOP_PUSH_DUP - 0x18 ++# CHECK-NEXT: 0x19 R_LARCH_SOP_PUSH_GPREL - 0x19 ++# CHECK-NEXT: 0x1A R_LARCH_SOP_PUSH_TLS_TPREL - 0x1A ++# CHECK-NEXT: 0x1B R_LARCH_SOP_PUSH_TLS_GOT - 0x1B ++# CHECK-NEXT: 0x1C R_LARCH_SOP_PUSH_TLS_GD - 0x1C ++# CHECK-NEXT: 0x1D R_LARCH_SOP_PUSH_PLT_PCREL - 0x1D ++# CHECK-NEXT: 0x1E R_LARCH_SOP_ASSERT - 0x1E ++# CHECK-NEXT: 0x1F R_LARCH_SOP_NOT - 0x1F ++# CHECK-NEXT: 0x20 R_LARCH_SOP_SUB - 0x20 ++# CHECK-NEXT: 0x21 R_LARCH_SOP_SL - 0x21 ++# CHECK-NEXT: 0x22 R_LARCH_SOP_SR - 0x22 ++# CHECK-NEXT: 0x23 R_LARCH_SOP_ADD - 0x23 ++# CHECK-NEXT: 0x24 R_LARCH_SOP_AND - 0x24 ++# CHECK-NEXT: 0x25 R_LARCH_SOP_IF_ELSE - 0x25 ++# CHECK-NEXT: 0x26 R_LARCH_SOP_POP_32_S_10_5 - 0x26 ++# CHECK-NEXT: 0x27 R_LARCH_SOP_POP_32_U_10_12 - 0x27 ++# CHECK-NEXT: 0x28 R_LARCH_SOP_POP_32_S_10_12 - 0x28 ++# CHECK-NEXT: 0x29 R_LARCH_SOP_POP_32_S_10_16 - 0x29 ++# CHECK-NEXT: 0x2A R_LARCH_SOP_POP_32_S_10_16_S2 - 0x2A ++# CHECK-NEXT: 0x2B R_LARCH_SOP_POP_32_S_5_20 - 0x2B ++# CHECK-NEXT: 0x2C R_LARCH_SOP_POP_32_S_0_5_10_16_S2 - 0x2C ++# CHECK-NEXT: 0x2D R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x2D ++# CHECK-NEXT: 0x2E R_LARCH_SOP_POP_32_U - 0x2E ++# CHECK-NEXT: 0x2F R_LARCH_ADD8 - 0x2F ++# CHECK-NEXT: 0x30 R_LARCH_ADD16 - 0x30 ++# CHECK-NEXT: 0x31 R_LARCH_ADD24 - 0x31 ++# CHECK-NEXT: 0x32 R_LARCH_ADD32 - 0x32 ++# CHECK-NEXT: 0x33 R_LARCH_ADD64 - 0x33 ++# CHECK-NEXT: 0x34 R_LARCH_SUB8 - 0x34 ++# CHECK-NEXT: 0x35 R_LARCH_SUB16 - 0x35 ++# CHECK-NEXT: 0x36 R_LARCH_SUB24 - 0x36 ++# CHECK-NEXT: 0x37 R_LARCH_SUB32 - 0x37 ++# CHECK-NEXT: 0x38 R_LARCH_SUB64 - 0x38 ++# CHECK-NEXT: 0x39 R_LARCH_GNU_VTINHERIT - 0x39 ++# CHECK-NEXT: 0x3A R_LARCH_GNU_VTENTRY - 0x3A ++# CHECK-NEXT: } ++# CHECK-NEXT: ] +diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg +index 197150e22..0ca12783a 100644 +--- a/llvm/test/tools/llvm-profgen/lit.local.cfg ++++ b/llvm/test/tools/llvm-profgen/lit.local.cfg +@@ -3,5 +3,5 @@ import lit.util + + config.suffixes = ['.test', '.ll', '.s', '.yaml'] + +-if not 'X86' in config.root.targets: ++if not ('X86' in config.root.targets and 'LoongArch' in config.root.targets): + config.unsupported = True +diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp +index eef5b8eb8..ceac76307 100644 +--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp ++++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp +@@ -196,9 +196,6 @@ void ProfiledBinary::load() { + exitWithError("not a valid Elf image", Path); + + TheTriple = Obj->makeTriple(); +- // Current only support X86 +- if (!TheTriple.isX86()) +- exitWithError("unsupported target", TheTriple.getTriple()); + LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); + + // Find the preferred load address for text sections. diff --git a/0103-Support-LoongArch.patch b/0103-Support-LoongArch.patch new file mode 100644 index 0000000..1b0a05c --- /dev/null +++ b/0103-Support-LoongArch.patch @@ -0,0 +1,24842 @@ +diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt +index 5127512fe..8383e6fae 100644 +--- a/bindings/python/tests/CMakeLists.txt ++++ b/bindings/python/tests/CMakeLists.txt +@@ -40,7 +40,7 @@ endif() + # addressed. + # SystemZ has broken Python/FFI interface: + # https://reviews.llvm.org/D52840#1265716 +-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$") ++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$") + set(RUN_PYTHON_TESTS FALSE) + endif() + +diff --git a/include/clang/Basic/BuiltinsLoongArch.def b/include/clang/Basic/BuiltinsLoongArch.def +new file mode 100644 +index 000000000..75d7e77c1 +--- /dev/null ++++ b/include/clang/Basic/BuiltinsLoongArch.def +@@ -0,0 +1,1990 @@ ++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific builtin function database. Users of ++// this file must define the BUILTIN macro to make use of this information. ++// ++//===----------------------------------------------------------------------===// ++ ++// The format of this database matches clang/Basic/Builtins.def. ++ ++// LoongArch LSX ++ ++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc") ++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrm_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrm_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrp_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrp_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrz_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrz_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrne_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrne_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc") ++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc") ++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc") ++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc") ++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc") ++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc") ++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc") ++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc") ++ ++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc") ++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc") ++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc") ++ ++//LoongArch LASX ++ ++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc") ++ ++ ++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc") ++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc") ++ ++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc") ++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc") ++ ++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc") ++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc") ++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc") ++ ++ ++// LoongArch BASE ++ ++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_d, "ULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc") ++BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc") ++BUILTIN(__builtin_loongarch_cacop_d, "viULiLi", "nc") ++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc") ++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_break, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc") ++#undef BUILTIN +diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h +index d8ad9858d..5af3d2099 100644 +--- a/include/clang/Basic/TargetBuiltins.h ++++ b/include/clang/Basic/TargetBuiltins.h +@@ -150,6 +150,16 @@ namespace clang { + }; + } // namespace RISCV + ++ /// LoongArch builtins ++ namespace LoongArch { ++ enum { ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, ++#define BUILTIN(ID, TYPE, ATTRS) BI##ID, ++#include "clang/Basic/BuiltinsLoongArch.def" ++ LastTSBuiltin ++ }; ++ } // namespace LoongArch ++ + /// Flags to identify the types for overloaded Neon builtins. + /// + /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. +@@ -341,7 +351,8 @@ namespace clang { + PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, + X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, + Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, +- SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); ++ SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, ++ LoongArch::LastTSBuiltin}); + + } // end namespace clang. + +diff --git a/include/clang/Basic/TargetCXXABI.def b/include/clang/Basic/TargetCXXABI.def +index 9501cca76..8ea4becef 100644 +--- a/include/clang/Basic/TargetCXXABI.def ++++ b/include/clang/Basic/TargetCXXABI.def +@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") + /// - representation of member function pointers adjusted as in ARM. + ITANIUM_CXXABI(GenericMIPS, "mips") + ++/// The generic LoongArch ABI is a modified version of the Itanium ABI. ++/// ++/// At the moment, only change from the generic ABI in this case is: ++/// - representation of member function pointers adjusted as in ARM. ++ITANIUM_CXXABI(GenericLoongArch, "loongarch") ++ + /// The WebAssembly ABI is a modified version of the Itanium ABI. + /// + /// The changes from the Itanium ABI are: +diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h +index e727f85ed..507cf580e 100644 +--- a/include/clang/Basic/TargetCXXABI.h ++++ b/include/clang/Basic/TargetCXXABI.h +@@ -102,6 +102,9 @@ public: + case GenericAArch64: + return T.isAArch64(); + ++ case GenericLoongArch: ++ return T.isLoongArch(); ++ + case GenericMIPS: + return T.isMIPS(); + +@@ -166,6 +169,7 @@ public: + case Fuchsia: + case GenericARM: + case GenericAArch64: ++ case GenericLoongArch: + case GenericMIPS: + // TODO: ARM-style pointers to member functions put the discriminator in + // the this adjustment, so they don't require functions to have any +@@ -250,6 +254,7 @@ public: + case GenericItanium: + case iOS: // old iOS compilers did not follow this rule + case Microsoft: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return true; +@@ -288,6 +293,7 @@ public: + case GenericAArch64: + case GenericARM: + case iOS: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return UseTailPaddingUnlessPOD03; +diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td +index 3cab37b21..8a264ac42 100644 +--- a/include/clang/Driver/Options.td ++++ b/include/clang/Driver/Options.td +@@ -184,6 +184,8 @@ def m_x86_Features_Group : OptionGroup<"">, + Group, Flags<[CoreOption]>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISCV">; ++def m_loongarch_Features_Group : OptionGroup<"">, ++ Group, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -3491,12 +3493,15 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group, Group, + HelpText<"Enable use of experimental RISC-V extensions.">; + +-def munaligned_access : Flag<["-"], "munaligned-access">, Group, +- HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; +-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, +- HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">; ++def munaligned_access : Flag<["-"], "munaligned-access">, Group, ++ HelpText<"Allow memory accesses to be unaligned">; ++def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, ++ HelpText<"Force all memory accesses to be aligned">; + def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, + HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; ++def mno_strict_align : Flag<["-"], "mno-strict-align">, Group, ++ Flags<[CC1Option,HelpHidden]>, Alias, ++ HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">; + def mno_thumb : Flag<["-"], "mno-thumb">, Group; + def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, + HelpText<"Disallow generation of complex IT blocks.">; +@@ -3824,6 +3829,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> + def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; ++def mlsx : Flag<["-"], "mlsx">, Group, ++ HelpText<"Use LARCH Loongson LSX instructions.">; ++def mno_lsx : Flag<["-"], "mno-lsx">, Group, ++ HelpText<"Disable LARCH Loongson LSX instructions.">; ++def mlasx : Flag<["-"], "mlasx">, Group, ++ HelpText<"Enable LARCH Loongson LASX instructions.">; ++def mno_lasx : Flag<["-"], "mno-lasx">, Group, ++ HelpText<"Disable LARCH Loongson LASX instructions.">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h +index 681a76dfa..5249fe601 100644 +--- a/include/clang/Sema/Sema.h ++++ b/include/clang/Sema/Sema.h +@@ -13117,6 +13117,9 @@ private: + bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); + bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); ++ bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall); + + bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); + bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); +diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap +index 01bce7771..a21e2beeb 100644 +--- a/include/clang/module.modulemap ++++ b/include/clang/module.modulemap +@@ -42,6 +42,7 @@ module Clang_Basic { + textual header "Basic/BuiltinsHexagon.def" + textual header "Basic/BuiltinsHexagonDep.def" + textual header "Basic/BuiltinsHexagonMapCustomDep.def" ++ textual header "Basic/BuiltinsLoongArch.def" + textual header "Basic/BuiltinsMips.def" + textual header "Basic/BuiltinsNEON.def" + textual header "Basic/BuiltinsNVPTX.def" +diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp +index cfd7bf604..c6e1e9eed 100644 +--- a/lib/AST/ASTContext.cpp ++++ b/lib/AST/ASTContext.cpp +@@ -900,6 +900,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericAArch64: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -11747,6 +11748,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: +diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt +index c38c9fddb..f0f3839a7 100644 +--- a/lib/Basic/CMakeLists.txt ++++ b/lib/Basic/CMakeLists.txt +@@ -82,6 +82,7 @@ add_clang_library(clangBasic + Targets/Hexagon.cpp + Targets/Lanai.cpp + Targets/Le64.cpp ++ Targets/LoongArch.cpp + Targets/M68k.cpp + Targets/MSP430.cpp + Targets/Mips.cpp +diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp +index 2d6ef9984..d63468161 100644 +--- a/lib/Basic/Targets.cpp ++++ b/lib/Basic/Targets.cpp +@@ -24,6 +24,7 @@ + #include "Targets/Hexagon.h" + #include "Targets/Lanai.h" + #include "Targets/Le64.h" ++#include "Targets/LoongArch.h" + #include "Targets/M68k.h" + #include "Targets/MSP430.h" + #include "Targets/Mips.h" +@@ -327,6 +328,25 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, + case llvm::Triple::le64: + return new Le64TargetInfo(Triple, Opts); + ++#if 0 ++ //TODO: support it in future ++ case llvm::Triple::loongarch32: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++#endif ++ ++ case llvm::Triple::loongarch64: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++ + case llvm::Triple::ppc: + if (Triple.isOSDarwin()) + return new DarwinPPC32TargetInfo(Triple, Opts); +diff --git a/lib/Basic/Targets/LoongArch.cpp b/lib/Basic/Targets/LoongArch.cpp +new file mode 100644 +index 000000000..7f5632327 +--- /dev/null ++++ b/lib/Basic/Targets/LoongArch.cpp +@@ -0,0 +1,184 @@ ++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "Targets.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Basic/MacroBuilder.h" ++#include "clang/Basic/TargetBuiltins.h" ++#include "llvm/ADT/StringSwitch.h" ++ ++using namespace clang; ++using namespace clang::targets; ++ ++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = { ++#define BUILTIN(ID, TYPE, ATTRS) \ ++ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, ++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ ++ {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, ++#include "clang/Basic/BuiltinsLoongArch.def" ++}; ++ ++bool LoongArchTargetInfo::processorSupportsGPR64() const { ++ return llvm::StringSwitch(CPU) ++ .Case("la264", true) ++ .Case("la364", true) ++ .Case("la464", true) ++ .Default(false); ++ return false; ++} ++ ++static constexpr llvm::StringLiteral ValidCPUNames[] = { ++ {"la264"}, {"la364"}, {"la464"}}; ++ ++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { ++ return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); ++} ++ ++void LoongArchTargetInfo::fillValidCPUList( ++ SmallVectorImpl &Values) const { ++ Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); ++} ++ ++void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const { ++ Builder.defineMacro("__loongarch__"); ++ unsigned GRLen = getRegisterWidth(); ++ Builder.defineMacro("__loongarch_grlen", Twine(GRLen)); ++ if (GRLen == 64) ++ Builder.defineMacro("__loongarch64"); ++ ++ if (ABI == "lp32") { ++ Builder.defineMacro("__loongarch32"); ++ } else { ++ Builder.defineMacro("__loongarch_lp64"); ++ } ++ ++ if (ABI == "lp32") { ++ Builder.defineMacro("_ABILP32", "1"); ++ } else if (ABI == "lpx32") { ++ Builder.defineMacro("_ABILPX32", "2"); ++ } else if (ABI == "lp64") { ++ Builder.defineMacro("_ABILP64", "3"); ++ Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64"); ++ } else ++ llvm_unreachable("Invalid ABI."); ++ ++ Builder.defineMacro("__REGISTER_PREFIX__", ""); ++ ++ switch (FloatABI) { ++ case HardFloat: ++ Builder.defineMacro("__loongarch_hard_float", Twine(1)); ++ Builder.defineMacro(IsSingleFloat ? "__loongarch_single_float" ++ : "__loongarch_double_float", ++ Twine(1)); ++ break; ++ case SoftFloat: ++ Builder.defineMacro("__loongarch_soft_float", Twine(1)); ++ break; ++ } ++ ++ switch (FPMode) { ++ case FP32: ++ Builder.defineMacro("__loongarch_fpr", Twine(32)); ++ Builder.defineMacro("__loongarch_frlen", Twine(32)); ++ break; ++ case FP64: ++ Builder.defineMacro("__loongarch_fpr", Twine(64)); ++ Builder.defineMacro("__loongarch_frlen", Twine(64)); ++ break; ++ } ++ ++ if (HasLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ ++ if (HasLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ ++ Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0))); ++ Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth())); ++ Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth())); ++ ++ Builder.defineMacro("_LOONGARCH_ARCH", "\"" + CPU + "\""); ++ Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(CPU).upper()); ++ ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); ++ ++ // 32-bit loongarch processors don't have the necessary lld/scd instructions ++ // found in 64-bit processors. In the case of lp32 on a 64-bit processor, ++ // the instructions exist but using them violates the ABI since they ++ // require 64-bit GPRs and LP32 only supports 32-bit GPRs. ++ if (ABI == "lpx32" || ABI == "lp64") ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); ++} ++ ++bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { ++ return llvm::StringSwitch(Feature) ++ .Case("fp64", FPMode == FP64) ++ .Case("lsx", HasLSX) ++ .Case("lasx", HasLASX) ++ .Default(false); ++} ++ ++ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { ++ return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - ++ Builtin::FirstTSBuiltin); ++} ++ ++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const { ++ // FIXME: It's valid to use LP32 on a 64-bit CPU but the backend can't handle ++ // this yet. It's better to fail here than on the backend assertion. ++ if (processorSupportsGPR64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } ++ ++ // 64-bit ABI's require 64-bit CPU's. ++ if (!processorSupportsGPR64() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } ++ ++ // FIXME: It's valid to use lp32 on a loongarch64 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // FIXME: It's valid to use lpx32/lp64 on a loongarch32 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch32() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // -mfp32 and lpx32/lp64 ABIs are incompatible ++ if (FPMode != FP64 && !IsSingleFloat && ++ (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << ABI; ++ return false; ++ } ++ ++ if (FPMode != FP64 && (CPU == "la264" || CPU == "la364" || CPU == "la464")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << CPU; ++ return false; ++ } ++ ++ return true; ++} +diff --git a/lib/Basic/Targets/LoongArch.h b/lib/Basic/Targets/LoongArch.h +new file mode 100644 +index 000000000..ef18cc887 +--- /dev/null ++++ b/lib/Basic/Targets/LoongArch.h +@@ -0,0 +1,402 @@ ++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++ ++#include "clang/Basic/TargetInfo.h" ++#include "clang/Basic/TargetOptions.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Support/Compiler.h" ++ ++namespace clang { ++namespace targets { ++ ++class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { ++ void setDataLayout() { ++ StringRef Layout; ++ ++ if (ABI == "lp32") ++ Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"; ++ else if (ABI == "lpx32") ++ Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else if (ABI == "lp64") ++ Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else ++ llvm_unreachable("Invalid ABI"); ++ ++ resetDataLayout(("e-" + Layout).str()); ++ } ++ ++ static const Builtin::Info BuiltinInfo[]; ++ std::string CPU; ++ bool IsSingleFloat; ++ enum LoongArchFloatABI { HardFloat, SoftFloat } FloatABI; ++ bool HasLSX; ++ bool HasLASX; ++ ++protected: ++ enum FPModeEnum { FP32, FP64 } FPMode; ++ std::string ABI; ++ ++public: ++ LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) ++ : TargetInfo(Triple), IsSingleFloat(false), FloatABI(HardFloat), ++ HasLSX(false), HasLASX(false), FPMode(FP64) { ++ TheCXXABI.set(TargetCXXABI::GenericLoongArch); ++ ++ if (Triple.isLoongArch32()) ++ setABI("lp32"); ++ else if (Triple.getEnvironment() == llvm::Triple::GNUABILPX32) ++ setABI("lpx32"); ++ else ++ setABI("lp64"); ++ ++ // Currently, CPU only supports 'la464' in LA. ++ if ( ABI == "lp64") ++ CPU = "la464"; ++ } ++ ++ bool processorSupportsGPR64() const; ++ ++ StringRef getABI() const override { return ABI; } ++ ++ bool setABI(const std::string &Name) override { ++ if (Name == "lp32") { ++ setLP32ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ ++ if (Name == "lpx32") { ++ //setLPX32ABITypes(); ++ //ABI = Name; ++ //return true; ++ //TODO: implement ++ return false; ++ } ++ if (Name == "lp64") { ++ setLP64ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ return false; ++ } ++ ++ void setLP32ABITypes() { ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongWidth = LongAlign = 32; ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ SuitableAlign = 64; ++ } ++ ++ void setLPX32LP64ABITypes() { ++ LongDoubleWidth = LongDoubleAlign = 128; ++ LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ if (getTriple().isOSFreeBSD()) { ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ } ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ SuitableAlign = 128; ++ } ++ ++ void setLP64ABITypes() { ++ setLPX32LP64ABITypes(); ++ if (getTriple().isOSOpenBSD()) { ++ Int64Type = SignedLongLong; ++ } else { ++ Int64Type = SignedLong; ++ } ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 64; ++ PointerWidth = PointerAlign = 64; ++ PtrDiffType = SignedLong; ++ SizeType = UnsignedLong; ++ } ++ ++ void setLPX32ABITypes() { ++ setLPX32LP64ABITypes(); ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ } ++ ++ bool isValidCPUName(StringRef Name) const override; ++ void fillValidCPUList(SmallVectorImpl &Values) const override; ++ ++ bool setCPU(const std::string &Name) override { ++ CPU = Name; ++ return isValidCPUName(Name); ++ } ++ ++ const std::string &getCPU() const { return CPU; } ++ bool ++ initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, ++ StringRef CPU, ++ const std::vector &FeaturesVec) const override { ++#if 0 ++ if (CPU.empty()) ++ CPU = getCPU(); ++ Features[CPU] = true; ++#else ++// if (CPU == "la464") ++// Features["loongarch64"] = true; ++ ++//FIXME: we need this? ++// if (CPU == "la464") ++// Features["64bit"] = true; ++#endif ++ return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const override; ++ ++ ArrayRef getTargetBuiltins() const override; ++ ++ bool hasFeature(StringRef Feature) const override; ++ ++ bool hasBitIntType() const override { return true; } ++ ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::VoidPtrBuiltinVaList; ++ } ++ ++ ArrayRef getGCCRegNames() const override { ++ static const char *const GCCRegNames[] = { ++ // CPU register names ++ // Must match second column of GCCRegAliases ++ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", ++ "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", ++ "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", ++ "$r28", "$r29", "$r30", "$r31", ++ // Floating point register names ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", ++ "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", ++ "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", ++ "$f28", "$f29", "$f30", "$f31", ++ // condition register names ++ "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", ++ // LSX register names ++ "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", ++ "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", ++ "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", ++ "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", ++ // LASX register names ++ "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", ++ "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", ++ "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", ++ "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31" ++ ++ }; ++ return llvm::makeArrayRef(GCCRegNames); ++ } ++ ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'r': // CPU registers. ++ case 'f': // floating-point registers. ++ Info.setAllowsRegister(); ++ return true; ++ case 'l': // Signed 16-bit constant ++ case 'I': // Signed 12-bit constant ++ case 'K': // Unsigned 12-bit constant ++ case 'J': // Integer 0 ++ case 'G': // Floating-point 0 ++ return true; ++ case 'm': // Memory address with 12-bit offset ++ case 'R': // An address that can be used in a non-macro load or store ++ Info.setAllowsMemory(); ++ return true; ++ case 'Z': ++ if (Name[1] == 'C' // Memory address with 16-bit and 4 bytes aligned offset ++ || Name[1] == 'B' ) { // Memory address with 0 offset ++ Info.setAllowsMemory(); ++ Name++; // Skip over 'Z'. ++ return true; ++ } ++ return false; ++ } ++ } ++ ++ std::string convertConstraint(const char *&Constraint) const override { ++ std::string R; ++ switch (*Constraint) { ++ case 'Z': // Two-character constraint; add "^" hint for later parsing. ++ if (Constraint[1] == 'C' || Constraint[1] == 'B') { ++ R = std::string("^") + std::string(Constraint, 2); ++ Constraint++; ++ return R; ++ } ++ break; ++ } ++ return TargetInfo::convertConstraint(Constraint); ++ } ++ ++ const char *getClobbers() const override { ++#if 0 ++ // In GCC, $1 is not widely used in generated code (it's used only in a few ++ // specific situations), so there is no real need for users to add it to ++ // the clobbers list if they want to use it in their inline assembly code. ++ // ++ // In LLVM, $1 is treated as a normal GPR and is always allocatable during ++ // code generation, so using it in inline assembly without adding it to the ++ // clobbers list can cause conflicts between the inline assembly code and ++ // the surrounding generated code. ++ // ++ // Another problem is that LLVM is allowed to choose $1 for inline assembly ++ // operands, which will conflict with the ".set at" assembler option (which ++ // we use only for inline assembly, in order to maintain compatibility with ++ // GCC) and will also conflict with the user's usage of $1. ++ // ++ // The easiest way to avoid these conflicts and keep $1 as an allocatable ++ // register for generated code is to automatically clobber $1 for all inline ++ // assembly code. ++ // ++ // FIXME: We should automatically clobber $1 only for inline assembly code ++ // which actually uses it. This would allow LLVM to use $1 for inline ++ // assembly operands if the user's assembly code doesn't use it. ++ return "~{$1}"; ++#endif ++ return ""; ++ } ++ ++ bool handleTargetFeatures(std::vector &Features, ++ DiagnosticsEngine &Diags) override { ++ IsSingleFloat = false; ++ FloatABI = HardFloat; ++ FPMode = FP64; ++ ++ for (const auto &Feature : Features) { ++ if (Feature == "+single-float") ++ IsSingleFloat = true; ++ else if (Feature == "+soft-float") ++ FloatABI = SoftFloat; ++ else if (Feature == "+lsx") ++ HasLSX = true; ++ else if (Feature == "+lasx") { ++ HasLASX = true; ++ HasLSX = true; ++ } else if (Feature == "+fp64") ++ FPMode = FP64; ++ else if (Feature == "-fp64") ++ FPMode = FP32; ++ } ++ ++ setDataLayout(); ++ ++ return true; ++ } ++ ++ int getEHDataRegisterNumber(unsigned RegNo) const override { ++ if (RegNo == 0) ++ return 4; ++ if (RegNo == 1) ++ return 5; ++ return -1; ++ } ++ ++ bool isCLZForZeroUndef() const override { return false; } ++ ++ ArrayRef getGCCRegAliases() const override { ++ static const TargetInfo::GCCRegAlias GCCRegAliases[] = { ++ {{"zero", "$zero", "r0", "$0"}, "$r0"}, ++ {{"ra", "$ra", "r1", "$1"}, "$r1"}, ++ {{"tp", "$tp", "r2", "$2"}, "$r2"}, ++ {{"sp", "$sp", "r3", "$3"}, "$r3"}, ++ {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"}, ++ {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"}, ++ {{"a2", "$a2", "r6", "$6"}, "$r6"}, ++ {{"a3", "$a3", "r7", "$7"}, "$r7"}, ++ {{"a4", "$a4", "r8", "$8"}, "$r8"}, ++ {{"a5", "$a5", "r9", "$9"}, "$r9"}, ++ {{"a6", "$a6", "r10", "$10"}, "$r10"}, ++ {{"a7", "$a7", "r11", "$11"}, "$r11"}, ++ {{"t0", "$t0", "r12", "$12"}, "$r12"}, ++ {{"t1", "$t1", "r13", "$13"}, "$r13"}, ++ {{"t2", "$t2", "r14", "$14"}, "$r14"}, ++ {{"t3", "$t3", "r15", "$15"}, "$r15"}, ++ {{"t4", "$t4", "r16", "$16"}, "$r16"}, ++ {{"t5", "$t5", "r17", "$17"}, "$r17"}, ++ {{"t6", "$t6", "r18", "$18"}, "$r18"}, ++ {{"t7", "$t7", "r19", "$19"}, "$r19"}, ++ {{"t8", "$t8", "r20", "$20"}, "$r20"}, ++ //{{"x", "$x", "r21", "$21"}, "$r21"}, ++ {{"fp", "$fp", "r22", "$22"}, "$r22"}, ++ {{"s0", "$s0", "r23", "$23"}, "$r23"}, ++ {{"s1", "$s1", "r24", "$24"}, "$r24"}, ++ {{"s2", "$s2", "r25", "$25"}, "$r25"}, ++ {{"s3", "$s3", "r26", "$26"}, "$r26"}, ++ {{"s4", "$s4", "r27", "$27"}, "$r27"}, ++ {{"s5", "$s5", "r28", "$28"}, "$r28"}, ++ {{"s6", "$s6", "r29", "$29"}, "$r29"}, ++ {{"s7", "$s7", "r30", "$30"}, "$r30"}, ++ {{"s8", "$s8", "r31", "$31"}, "$r31"}, ++ {{"fa0", "$fa0", "f0"}, "$f0"}, ++ {{"fa1", "$fa1", "f1"}, "$f1"}, ++ {{"fa2", "$fa2", "f2"}, "$f2"}, ++ {{"fa3", "$fa3", "f3"}, "$f3"}, ++ {{"fa4", "$fa4", "f4"}, "$f4"}, ++ {{"fa5", "$fa5", "f5"}, "$f5"}, ++ {{"fa6", "$fa6", "f6"}, "$f6"}, ++ {{"fa7", "$fa7", "f7"}, "$f7"}, ++ {{"ft0", "$ft0", "f8"}, "$f8"}, ++ {{"ft1", "$ft1", "f9"}, "$f9"}, ++ {{"ft2", "$ft2", "f10"}, "$f10"}, ++ {{"ft3", "$ft3", "f11"}, "$f11"}, ++ {{"ft4", "$ft4", "f12"}, "$f12"}, ++ {{"ft5", "$ft5", "f13"}, "$f13"}, ++ {{"ft6", "$ft6", "f14"}, "$f14"}, ++ {{"ft7", "$ft7", "f15"}, "$f15"}, ++ {{"ft8", "$ft8", "f16"}, "$f16"}, ++ {{"ft9", "$ft9", "f17"}, "$f17"}, ++ {{"ft10", "$ft10", "f18"}, "$f18"}, ++ {{"ft11", "$ft11", "f19"}, "$f19"}, ++ {{"ft12", "$ft12", "f20"}, "$f20"}, ++ {{"ft13", "$ft13", "f21"}, "$f21"}, ++ {{"ft14", "$ft14", "f22"}, "$f22"}, ++ {{"ft15", "$ft15", "f23"}, "$f23"}, ++ {{"fs0", "$fs0", "f24"}, "$f24"}, ++ {{"fs1", "$fs1", "f25"}, "$f25"}, ++ {{"fs2", "$fs2", "f26"}, "$f26"}, ++ {{"fs3", "$fs3", "f27"}, "$f27"}, ++ {{"fs4", "$fs4", "f28"}, "$f28"}, ++ {{"fs5", "$fs5", "f29"}, "$f29"}, ++ {{"fs6", "$fs6", "f30"}, "$f30"}, ++ {{"fs7", "$fs7", "f31"}, "$f31"}, ++ }; ++ return llvm::makeArrayRef(GCCRegAliases); ++ } ++ ++ bool hasInt128Type() const override { ++ return (ABI == "lpx32" || ABI == "lp64") || getTargetOpts().ForceEnableInt128; ++ } ++ ++ bool validateTarget(DiagnosticsEngine &Diags) const override; ++}; ++} // namespace targets ++} // namespace clang ++ ++#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp +index d87692fac..197915e15 100644 +--- a/lib/CodeGen/CodeGenModule.cpp ++++ b/lib/CodeGen/CodeGenModule.cpp +@@ -84,6 +84,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::GenericARM: + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp +index fc2ff15a6..eebd6fb65 100644 +--- a/lib/CodeGen/ItaniumCXXABI.cpp ++++ b/lib/CodeGen/ItaniumCXXABI.cpp +@@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); + ++ case TargetCXXABI::GenericLoongArch: ++ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); ++ + case TargetCXXABI::GenericMIPS: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + +diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp +index 44743fa02..fbc6aed85 100644 +--- a/lib/CodeGen/TargetInfo.cpp ++++ b/lib/CodeGen/TargetInfo.cpp +@@ -11323,6 +11323,558 @@ public: + }; + } // namespace + ++//===----------------------------------------------------------------------===// ++// LoongArch ABI Implementation ++//===----------------------------------------------------------------------===// ++ ++namespace { ++class LoongArchABIInfo : public DefaultABIInfo { ++private: ++ // Size of the integer ('r') registers in bits. ++ unsigned GRLen; ++ // Size of the floating point ('f') registers in bits. Note that the target ++ // ISA might have a wider FRLen than the selected ABI. ++ unsigned FRLen; ++ static const int NumArgGPRs = 8; ++ static const int NumArgFPRs = 8; ++ bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const; ++ ++public: ++ LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) ++ : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} ++ ++ // DefaultABIInfo's classifyReturnType and classifyArgumentType are ++ // non-virtual, but computeInfo is virtual, so we overload it. ++ void computeInfo(CGFunctionInfo &FI) const override; ++ ++ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const; ++ ABIArgInfo classifyReturnType(QualType RetTy) const; ++ ++ uint64_t MinABIStackAlignInBytes = 8; ++ uint64_t StackAlignInBytes = 16; ++ llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; ++ llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; ++ void CoerceToIntArgs(uint64_t TySize, ++ SmallVectorImpl &ArgList) const; ++ ++ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const override; ++ ++ ABIArgInfo extendType(QualType Ty) const; ++ ++ bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, int &NeededArgGPRs, ++ int &NeededArgFPRs) const; ++ ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, ++ CharUnits Field1Off, ++ llvm::Type *Field2Ty, ++ CharUnits Field2Off) const; ++}; ++} // end anonymous namespace ++ ++void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { ++ QualType RetTy = FI.getReturnType(); ++ if (!getCXXABI().classifyReturnType(FI)) ++ FI.getReturnInfo() = classifyReturnType(RetTy); ++ ++ // IsRetIndirect is true if classifyArgumentType indicated the value should ++ // be passed indirect or if the type size is greater than 2*grlen. ++ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || ++ getContext().getTypeSize(RetTy) > (2 * GRLen); ++ ++ // We must track the number of GPRs used in order to conform to the LoongArch ++ // ABI, as integer scalars passed in registers should have signext/zeroext ++ // when promoted, but are anyext if passed on the stack. As GPR usage is ++ // different for variadic arguments, we must also track whether we are ++ // examining a vararg or not. ++ int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; ++ int ArgFPRsLeft = FRLen ? NumArgFPRs : 0; ++ int NumFixedArgs = FI.getNumRequiredArgs(); ++ ++ int ArgNum = 0; ++ for (auto &ArgInfo : FI.arguments()) { ++ bool IsFixed = ArgNum < NumFixedArgs; ++ ArgInfo.info = ++ classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ++ ArgNum++; ++ } ++} ++ ++// Returns true if the struct is a potential candidate for the floating point ++// calling convention. If this function returns true, the caller is ++// responsible for checking that if there is only a single field then that ++// field is a float. ++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const { ++ bool IsInt = Ty->isIntegralOrEnumerationType(); ++ bool IsFloat = Ty->isRealFloatingType(); ++ ++ if (IsInt || IsFloat) { ++ uint64_t Size = getContext().getTypeSize(Ty); ++ if (IsInt && Size > GRLen) ++ return false; ++ // Can't be eligible if larger than the FP registers. Half precision isn't ++ // currently supported on LoongArch and the ABI hasn't been confirmed, so ++ // default to the integer ABI in that case. ++ if (IsFloat && (Size > FRLen || Size < 32)) ++ return false; ++ // Can't be eligible if an integer type was already found (int+int pairs ++ // are not eligible). ++ if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) ++ return false; ++ if (!Field1Ty) { ++ Field1Ty = CGT.ConvertType(Ty); ++ Field1Off = CurOff; ++ return true; ++ } ++ if (!Field2Ty) { ++ Field2Ty = CGT.ConvertType(Ty); ++ Field2Off = CurOff; ++ return true; ++ } ++ return false; ++ } ++ ++ if (auto CTy = Ty->getAs()) { ++ if (Field1Ty) ++ return false; ++ QualType EltTy = CTy->getElementType(); ++ if (getContext().getTypeSize(EltTy) > FRLen) ++ return false; ++ Field1Ty = CGT.ConvertType(EltTy); ++ Field1Off = CurOff; ++ assert(CurOff.isZero() && "Unexpected offset for first field"); ++ Field2Ty = Field1Ty; ++ Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); ++ return true; ++ } ++ ++ if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { ++ uint64_t ArraySize = ATy->getSize().getZExtValue(); ++ QualType EltTy = ATy->getElementType(); ++ CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); ++ for (uint64_t i = 0; i < ArraySize; ++i) { ++ bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, ++ Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ CurOff += EltSize; ++ } ++ return true; ++ } ++ ++ if (const auto *RTy = Ty->getAs()) { ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are not eligible for the FP calling convention. ++ if (getRecordArgABI(Ty, CGT.getCXXABI())) ++ return false; ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return true; ++ const RecordDecl *RD = RTy->getDecl(); ++ // Unions aren't eligible unless they're empty (which is caught above). ++ if (RD->isUnion()) ++ return false; ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ // If this is a C++ record, check the bases first. ++ if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { ++ for (const CXXBaseSpecifier &B : CXXRD->bases()) { ++ const auto *BDecl = ++ cast(B.getType()->castAs()->getDecl()); ++ CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); ++ bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, ++ Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ if (!Ret) ++ return false; ++ } ++ } ++ int ZeroWidthBitFieldCount = 0; ++ for (const FieldDecl *FD : RD->fields()) { ++ uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); ++ QualType QTy = FD->getType(); ++ if (FD->isBitField()) { ++ unsigned BitWidth = FD->getBitWidthValue(getContext()); ++ // Allow a bitfield with a type greater than GRLen as long as the ++ // bitwidth is GRLen or less. ++ if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) ++ QTy = getContext().getIntTypeForBitwidth(GRLen, false); ++ if (BitWidth == 0) { ++ ZeroWidthBitFieldCount++; ++ continue; ++ } ++ } ++ ++ bool Ret = detectFPCCEligibleStructHelper( ++ QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), ++ Field1Ty, Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ ++ // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp ++ // or int+fp structs, but are ignored for a struct with an fp field and ++ // any number of zero-width bitfields. ++ if (Field2Ty && ZeroWidthBitFieldCount > 0) ++ return false; ++ } ++ return Field1Ty != nullptr; ++ } ++ ++ return false; ++} ++ ++// Determine if a struct is eligible for passing according to the floating ++// point calling convention (i.e., when flattened it contains a single fp ++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and ++// NeededArgGPRs are incremented appropriately. ++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, ++ int &NeededArgGPRs, ++ int &NeededArgFPRs) const { ++ Field1Ty = nullptr; ++ Field2Ty = nullptr; ++ NeededArgGPRs = 0; ++ NeededArgFPRs = 0; ++ bool IsCandidate = detectFPCCEligibleStructHelper( ++ Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); ++ // Not really a candidate if we have a single int but no float. ++ if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) ++ return IsCandidate = false; ++ if (!IsCandidate) ++ return false; ++ if (Field1Ty && Field1Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field1Ty) ++ NeededArgGPRs++; ++ if (Field2Ty && Field2Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field2Ty) ++ NeededArgGPRs++; ++ return IsCandidate; ++} ++ ++// Call getCoerceAndExpand for the two-element flattened struct described by ++// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an ++// appropriate coerceToType and unpaddedCoerceToType. ++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct( ++ llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, ++ CharUnits Field2Off) const { ++ SmallVector CoerceElts; ++ SmallVector UnpaddedCoerceElts; ++ if (!Field1Off.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); ++ ++ CoerceElts.push_back(Field1Ty); ++ UnpaddedCoerceElts.push_back(Field1Ty); ++ ++ if (!Field2Ty) { ++ return ABIArgInfo::getCoerceAndExpand( ++ llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), ++ UnpaddedCoerceElts[0]); ++ } ++ ++ CharUnits Field2Align = ++ CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); ++ CharUnits Field1Size = ++ CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); ++ CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); ++ ++ CharUnits Padding = CharUnits::Zero(); ++ if (Field2Off > Field2OffNoPadNoPack) ++ Padding = Field2Off - Field2OffNoPadNoPack; ++ else if (Field2Off != Field2Align && Field2Off > Field1Size) ++ Padding = Field2Off - Field1Size; ++ ++ bool IsPacked = !Field2Off.isMultipleOf(Field2Align); ++ ++ if (!Padding.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); ++ ++ CoerceElts.push_back(Field2Ty); ++ UnpaddedCoerceElts.push_back(Field2Ty); ++ ++ auto CoerceToType = ++ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); ++ auto UnpaddedCoerceToType = ++ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); ++ ++ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); ++} ++ ++void LoongArchABIInfo::CoerceToIntArgs( ++ uint64_t TySize, SmallVectorImpl &ArgList) const { ++ llvm::IntegerType *IntTy = ++ llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); ++ ++ // Add (TySize / MinABIStackAlignInBytes) args of IntTy. ++ for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ++ ArgList.push_back(IntTy); ++ ++ // If necessary, add one more integer type to ArgList. ++ unsigned R = TySize % (MinABIStackAlignInBytes * 8); ++ ++ if (R) ++ ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); ++} ++ ++llvm::Type* LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { ++ SmallVector ArgList, IntArgList; ++ ++ if (Ty->isComplexType()) ++ return CGT.ConvertType(Ty); ++ ++ const RecordType *RT = Ty->getAs(); ++ ++ // Unions/vectors are passed in integer registers. ++ if (!RT || !RT->isStructureOrClassType()) { ++ CoerceToIntArgs(TySize, ArgList); ++ return llvm::StructType::get(getVMContext(), ArgList); ++ } ++ ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ assert(!(TySize % 8) && "Size of structure must be multiple of 8."); ++ ++ uint64_t LastOffset = 0; ++ unsigned idx = 0; ++ llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); ++ ++ // Iterate over fields in the struct/class and check if there are any aligned ++ // double fields. ++ for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); ++ i != e; ++i, ++idx) { ++ const QualType Ty = i->getType(); ++ const BuiltinType *BT = Ty->getAs(); ++ ++ if (!BT || BT->getKind() != BuiltinType::Double) ++ continue; ++ ++ uint64_t Offset = Layout.getFieldOffset(idx); ++ if (Offset % 64) // Ignore doubles that are not aligned. ++ continue; ++ ++ // Add ((Offset - LastOffset) / 64) args of type i64. ++ for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ++ ArgList.push_back(I64); ++ ++ // Add double type. ++ ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); ++ LastOffset = Offset + 64; ++ } ++ ++ CoerceToIntArgs(TySize - LastOffset, IntArgList); ++ ArgList.append(IntArgList.begin(), IntArgList.end()); ++ ++ return llvm::StructType::get(getVMContext(), ArgList); ++} ++ ++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset, ++ uint64_t Offset) const { ++ if (OrigOffset + MinABIStackAlignInBytes > Offset) ++ return nullptr; ++ ++ return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, ++ int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const { ++ assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); ++ Ty = useFirstFieldIfTransparentUnion(Ty); ++ ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are always passed indirectly. ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { ++ if (ArgGPRsLeft) ++ ArgGPRsLeft -= 1; ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == ++ CGCXXABI::RAA_DirectInMemory); ++ } ++ ++ // Ignore empty structs/unions. ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return ABIArgInfo::getIgnore(); ++ ++ uint64_t Size = getContext().getTypeSize(Ty); ++ ++ // Pass floating point values via FPRs if possible. ++ if (IsFixed && Ty->isFloatingType() && FRLen >= Size && ArgFPRsLeft) { ++ ArgFPRsLeft--; ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Complex types for the hard float ABI must be passed direct rather than ++ // using CoerceAndExpand. ++ if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) { ++ QualType EltTy = Ty->getAs()->getElementType(); ++ if (getContext().getTypeSize(EltTy) <= FRLen) { ++ ArgFPRsLeft -= 2; ++ return ABIArgInfo::getDirect(); ++ } ++ } ++ ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ ++ if (IsFixed && FRLen && Ty->isStructureOrClassType()) { ++ llvm::Type *Field1Ty = nullptr; ++ llvm::Type *Field2Ty = nullptr; ++ CharUnits Field1Off = CharUnits::Zero(); ++ CharUnits Field2Off = CharUnits::Zero(); ++ int NeededArgGPRs; ++ int NeededArgFPRs; ++ bool IsCandidate = ++ detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, ++ NeededArgGPRs, NeededArgFPRs); ++ if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && ++ NeededArgFPRs <= ArgFPRsLeft) { ++ ArgGPRsLeft -= NeededArgGPRs; ++ ArgFPRsLeft -= NeededArgFPRs; ++ return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ } ++ } else if (Ty->isStructureOrClassType() && Size == 128 && ++ isAggregateTypeForABI(Ty)) { ++ uint64_t Offset = 8; ++ uint64_t OrigOffset = Offset; ++ uint64_t TySize = getContext().getTypeSize(Ty); ++ uint64_t Align = getContext().getTypeAlign(Ty) / 8; ++ ++ Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), ++ (uint64_t)StackAlignInBytes); ++ unsigned CurrOffset = llvm::alignTo(Offset, Align); ++ Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; ++ ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; ++ } ++ ++ uint64_t NeededAlign = getContext().getTypeAlign(Ty); ++ // Determine the number of GPRs needed to pass the current argument ++ // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" ++ // register pairs, so may consume 3 registers. ++ int NeededArgGPRs = 1; ++ if (!IsFixed && NeededAlign == 2 * GRLen) ++ NeededArgGPRs = 2 + (ArgGPRsLeft % 2); ++ else if (Size > GRLen && Size <= 2 * GRLen) ++ NeededArgGPRs = 2; ++ ++ if (NeededArgGPRs > ArgGPRsLeft) { ++ NeededArgGPRs = ArgGPRsLeft; ++ } ++ ++ ArgGPRsLeft -= NeededArgGPRs; ++ ++ if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ // All integral types are promoted to GRLen width, unless passed on the ++ // stack. ++ if (Size < GRLen && Ty->isIntegralOrEnumerationType()) { ++ return extendType(Ty); ++ } ++ ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Aggregates which are <= 2*GRLen will be passed in registers if possible, ++ // so coerce to integers. ++ if (Size <= 2 * GRLen) { ++ unsigned Alignment = getContext().getTypeAlign(Ty); ++ ++ // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is ++ // required, and a 2-element GRLen array if only GRLen alignment is required. ++ if (Size <= GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), GRLen)); ++ } else if (Alignment == 2 * GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), 2 * GRLen)); ++ } else { ++ return ABIArgInfo::getDirect(llvm::ArrayType::get( ++ llvm::IntegerType::get(getVMContext(), GRLen), 2)); ++ } ++ } ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/false); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { ++ if (RetTy->isVoidType()) ++ return ABIArgInfo::getIgnore(); ++ ++ int ArgGPRsLeft = 2; ++ int ArgFPRsLeft = FRLen ? 2 : 0; ++ ++ // The rules for return and argument types are the same, so defer to ++ // classifyArgumentType. ++ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ++ ArgFPRsLeft); ++} ++ ++Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const { ++ CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); ++ ++ // Empty records are ignored for parameter passing purposes. ++ if (isEmptyRecord(getContext(), Ty, true)) { ++ Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), ++ getVAListElementType(CGF), SlotSize); ++ Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); ++ return Addr; ++ } ++ ++ auto TInfo = getContext().getTypeInfoInChars(Ty); ++ ++ // Arguments bigger than 2*GRlen bytes are passed indirectly. ++ bool IsIndirect = TInfo.Width > 2 * SlotSize; ++ ++ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, ++ SlotSize, /*AllowHigherAlign=*/true); ++} ++ ++ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { ++ int TySize = getContext().getTypeSize(Ty); ++ // LP64 ABI requires unsigned 32 bit integers to be sign extended. ++ if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) ++ return ABIArgInfo::getSignExtend(Ty); ++ return ABIArgInfo::getExtend(Ty); ++} ++ ++namespace { ++class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { ++public: ++ LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, ++ unsigned FRLen) ++ : TargetCodeGenInfo(std::make_unique( ++ CGT, GRLen, FRLen)) {} ++ ++ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, ++ CodeGen::CodeGenModule &CGM) const override { ++ return; ++ } ++}; ++} // namespace ++ + //===----------------------------------------------------------------------===// + // VE ABI Implementation. + // +@@ -11560,6 +12112,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + + case llvm::Triple::le32: + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); ++ + case llvm::Triple::m68k: + return SetCGInfo(new M68kTargetCodeGenInfo(Types)); + case llvm::Triple::mips: +@@ -11677,6 +12230,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + case llvm::Triple::msp430: + return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + ++ case llvm::Triple::loongarch64: ++ return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64)); ++ + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: { + StringRef ABIStr = getTarget().getABI(); +diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt +index 18c9b2d04..14c7053e0 100644 +--- a/lib/Driver/CMakeLists.txt ++++ b/lib/Driver/CMakeLists.txt +@@ -28,6 +28,7 @@ add_clang_library(clangDriver + ToolChains/Arch/AArch64.cpp + ToolChains/Arch/ARM.cpp + ToolChains/Arch/CSKY.cpp ++ ToolChains/Arch/LoongArch.cpp + ToolChains/Arch/M68k.cpp + ToolChains/Arch/Mips.cpp + ToolChains/Arch/PPC.cpp +diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp +index 3f29afd35..50970bd33 100644 +--- a/lib/Driver/Driver.cpp ++++ b/lib/Driver/Driver.cpp +@@ -626,6 +626,29 @@ static llvm::Triple computeTargetTriple(const Driver &D, + Target.setVendorName("intel"); + } + ++ // If target is LoongArch adjust the target triple ++ // accordingly to provided ABI name. ++ A = Args.getLastArg(options::OPT_mabi_EQ); ++ if (A && Target.isLoongArch()) { ++ StringRef ABIName = A->getValue(); ++ if (ABIName == "lp32") { ++ Target = Target.get32BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNUABI64 || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNU); ++ } else if (ABIName == "lpx32") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABI64) ++ Target.setEnvironment(llvm::Triple::GNUABILPX32); ++ } else if (ABIName == "lp64") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNUABI64); ++ } ++ } ++ + // If target is MIPS adjust the target triple + // accordingly to provided ABI name. + A = Args.getLastArg(options::OPT_mabi_EQ); +diff --git a/lib/Driver/ToolChains/Arch/LoongArch.cpp b/lib/Driver/ToolChains/Arch/LoongArch.cpp +new file mode 100644 +index 000000000..2c42db690 +--- /dev/null ++++ b/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -0,0 +1,211 @@ ++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "ToolChains/CommonArgs.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Options.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/Option/ArgList.h" ++ ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang; ++using namespace llvm::opt; ++ ++// Get CPU and ABI names. They are not independent ++// so we have to calculate them together. ++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, ++ StringRef &CPUName, StringRef &ABIName) { ++ const char *DefLoongArch32CPU = "loongarch32"; ++ const char *DefLoongArch64CPU = "la464"; ++ ++ if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, ++ options::OPT_mcpu_EQ)) ++ CPUName = A->getValue(); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ++ ABIName = A->getValue(); ++ // Convert a GNU style LoongArch ABI name to the name ++ // accepted by LLVM LoongArch backend. ++ ABIName = llvm::StringSwitch(ABIName) ++ .Case("32", "lp32") ++ .Case("64", "lp64") ++ .Default(ABIName); ++ } ++ ++ // Setup default CPU and ABI names. ++ if (CPUName.empty() && ABIName.empty()) { ++ switch (Triple.getArch()) { ++ default: ++ llvm_unreachable("Unexpected triple arch name"); ++ case llvm::Triple::loongarch32: ++ CPUName = DefLoongArch32CPU; ++ break; ++ case llvm::Triple::loongarch64: ++ CPUName = DefLoongArch64CPU; ++ break; ++ } ++ } ++ ++ if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)) ++ ABIName = "lpx32"; ++ ++ if (ABIName.empty()) { ++ ABIName = llvm::StringSwitch(CPUName) ++ .Case("loongarch32", "lp32") ++ .Cases("la264", "la364", "la464", "lp64") ++ .Default(""); ++ } ++ ++ if (ABIName.empty()) { ++ // Deduce ABI name from the target triple. ++ ABIName = Triple.isLoongArch32() ? "lp32" : "lp64"; ++ } ++ ++ if (CPUName.empty()) { ++ // Deduce CPU name from ABI name. ++ CPUName = llvm::StringSwitch(ABIName) ++ .Case("lp32", DefLoongArch32CPU) ++ .Cases("lpx32", "lp64", DefLoongArch64CPU) ++ .Default(""); ++ } ++ ++ // FIXME: Warn on inconsistent use of -march and -mabi. ++} ++ ++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args, ++ const llvm::Triple &Triple) { ++ StringRef CPUName, ABIName; ++ tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ return llvm::StringSwitch(ABIName) ++ .Case("lp32", "") ++ .Case("lpx32", "32") ++ .Case("lp64", "64"); ++} ++ ++// Convert ABI name to the GNU tools acceptable variant. ++StringRef loongarch::getGnuCompatibleLoongArchABIName(StringRef ABI) { ++ return llvm::StringSwitch(ABI) ++ .Case("lp32", "32") ++ .Case("lp64", "64") ++ .Default(ABI); ++} ++ ++// Select the LoongArch float ABI as determined by -msoft-float, -mhard-float, ++// and -mfloat-abi=. ++loongarch::FloatABI loongarch::getLoongArchFloatABI(const Driver &D, const ArgList &Args) { ++ loongarch::FloatABI ABI = loongarch::FloatABI::Invalid; ++ if (Arg *A = ++ Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, ++ options::OPT_mfloat_abi_EQ)) { ++ if (A->getOption().matches(options::OPT_msoft_float)) ++ ABI = loongarch::FloatABI::Soft; ++ else if (A->getOption().matches(options::OPT_mhard_float)) ++ ABI = loongarch::FloatABI::Hard; ++ else { ++ ABI = llvm::StringSwitch(A->getValue()) ++ .Case("soft", loongarch::FloatABI::Soft) ++ .Case("hard", loongarch::FloatABI::Hard) ++ .Default(loongarch::FloatABI::Invalid); ++ if (ABI == loongarch::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { ++ D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ++ ABI = loongarch::FloatABI::Hard; ++ } ++ } ++ } ++ ++ // If unspecified, choose the default based on the platform. ++ if (ABI == loongarch::FloatABI::Invalid) { ++ // Assume "hard", because it's a default value used by gcc. ++ // When we start to recognize specific target LoongArch processors, ++ // we will be able to select the default more correctly. ++ ABI = loongarch::FloatABI::Hard; ++ } ++ ++ assert(ABI != loongarch::FloatABI::Invalid && "must select an ABI"); ++ return ABI; ++} ++ ++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args, ++ std::vector &Features) { ++ StringRef CPUName; ++ StringRef ABIName; ++ getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ABIName = getGnuCompatibleLoongArchABIName(ABIName); ++ ++ // At final link time, LP32 and LPX32 with CPIC will have another section ++ // added to the binary which contains the stub functions to perform ++ // any fixups required for PIC code. ++ ++ bool IsLP64 = ABIName == "64"; ++ bool NonPIC = false; ++ ++ Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, ++ options::OPT_fpic, options::OPT_fno_pic, ++ options::OPT_fPIE, options::OPT_fno_PIE, ++ options::OPT_fpie, options::OPT_fno_pie); ++ if (LastPICArg) { ++ Option O = LastPICArg->getOption(); ++ NonPIC = ++ (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) || ++ O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie)); ++ } ++ ++ if (IsLP64 && NonPIC) { ++ NonPIC = false; ++ } ++ ++ loongarch::FloatABI FloatABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (FloatABI == loongarch::FloatABI::Soft) { ++ // FIXME: Note, this is a hack. We need to pass the selected float ++ // mode to the LoongArchTargetInfoBase to define appropriate macros there. ++ // Now it is the only method. ++ Features.push_back("+soft-float"); ++ } ++ ++ AddTargetFeature(Args, Features, options::OPT_msingle_float, ++ options::OPT_mdouble_float, "single-float"); ++ ++ AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx, ++ "lsx"); ++ AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx, ++ "lasx"); ++ ++ AddTargetFeature(Args, Features, options::OPT_munaligned_access, ++ options::OPT_mno_unaligned_access, "unaligned-access"); ++ ++ // Add the last -mfp32/-mfp64, if none are given and fp64 is default, ++ // pass fp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ if (A->getOption().matches(options::OPT_mfp32)) ++ Features.push_back("-fp64"); ++ else ++ Features.push_back("+fp64"); ++ } else if (loongarch::isFP64Default(Args)) { ++ Features.push_back("+fp64"); ++ } ++ ++} ++ ++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) { ++ Arg *A = Args.getLastArg(options::OPT_mabi_EQ); ++ return A && (A->getValue() == StringRef(Value)); ++} ++ ++bool loongarch::isUCLibc(const ArgList &Args) { ++ Arg *A = Args.getLastArg(options::OPT_m_libc_Group); ++ return A && A->getOption().matches(options::OPT_muclibc); ++} ++ ++bool loongarch::isFP64Default(const ArgList &Args) { ++ return Args.getLastArg(options::OPT_msingle_float) ? false : true; ++} +diff --git a/lib/Driver/ToolChains/Arch/LoongArch.h b/lib/Driver/ToolChains/Arch/LoongArch.h +new file mode 100644 +index 000000000..53664346f +--- /dev/null ++++ b/lib/Driver/ToolChains/Arch/LoongArch.h +@@ -0,0 +1,49 @@ ++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++ ++#include "clang/Driver/Driver.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Option/Option.h" ++#include ++#include ++ ++namespace clang { ++namespace driver { ++namespace tools { ++ ++namespace loongarch { ++enum class FloatABI { ++ Invalid, ++ Soft, ++ Hard, ++}; ++ ++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple, StringRef &CPUName, ++ StringRef &ABIName); ++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args, ++ std::vector &Features); ++StringRef getGnuCompatibleLoongArchABIName(StringRef ABI); ++loongarch::FloatABI getLoongArchFloatABI(const Driver &D, const llvm::opt::ArgList &Args); ++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple); ++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value); ++bool isUCLibc(const llvm::opt::ArgList &Args); ++bool isFP64Default(const llvm::opt::ArgList &Args); ++ ++} // end namespace loongarch ++} // end namespace target ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H +diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp +index 3704ed858..8381fb9bd 100644 +--- a/lib/Driver/ToolChains/Clang.cpp ++++ b/lib/Driver/ToolChains/Clang.cpp +@@ -11,6 +11,7 @@ + #include "Arch/AArch64.h" + #include "Arch/ARM.h" + #include "Arch/CSKY.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -317,6 +318,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); ++ break; ++ + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -527,6 +533,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, + // XCore never wants frame pointers, regardless of OS. + // WebAssembly never wants frame pointers. + return false; ++ case llvm::Triple::loongarch64: ++ case llvm::Triple::loongarch32: + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -1794,6 +1802,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, + CmdArgs.push_back("-fallow-half-arguments-and-returns"); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -1933,6 +1946,45 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, + AddUnalignedAccessWarning(CmdArgs); + } + ++void Clang::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ const Driver &D = getToolChain().getDriver(); ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++ ++ loongarch::FloatABI ABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (ABI == loongarch::FloatABI::Soft) { ++ // Floating point operations and argument passing are soft. ++ CmdArgs.push_back("-msoft-float"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("soft"); ++ } else { ++ // Floating point operations and argument passing are hard. ++ assert(ABI == loongarch::FloatABI::Hard && "Invalid float abi!"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("hard"); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, ++ options::OPT_mno_check_zero_division)) { ++ if (A->getOption().matches(options::OPT_mno_check_zero_division)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-mnocheck-zero-division"); ++ } ++ } ++ ++ llvm::Reloc::Model RelocationModel; ++ unsigned PICLevel; ++ bool IsPIE; ++ std::tie(RelocationModel, PICLevel, IsPIE) = ++ ParsePICArgs(getToolChain(), Args); ++} ++ + void Clang::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); +@@ -7812,6 +7864,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args, + + // Begin ClangAs + ++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++} ++ + void ClangAs::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + StringRef CPUName; +@@ -8007,6 +8070,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, + default: + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h +index 5209c6687..e28012af1 100644 +--- a/lib/Driver/ToolChains/Clang.h ++++ b/lib/Driver/ToolChains/Clang.h +@@ -57,6 +57,8 @@ private: + bool KernelOrKext) const; + void AddARM64TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddPPCTargetArgs(const llvm::opt::ArgList &Args, +@@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool { + public: + ClangAs(const ToolChain &TC) + : Tool("clang::as", "clang integrated assembler", TC) {} ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddX86TargetArgs(const llvm::opt::ArgList &Args, +diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp +index 443725f7d..a3778db38 100644 +--- a/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/lib/Driver/ToolChains/CommonArgs.cpp +@@ -9,6 +9,7 @@ + #include "CommonArgs.h" + #include "Arch/AArch64.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -376,6 +377,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + return A->getValue(); + return ""; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName); ++ return std::string(CPUName); ++ } ++ + case llvm::Triple::m68k: + return m68k::getM68kTargetCPU(Args); + +@@ -1378,6 +1387,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { + if ((ROPI || RWPI) && (PIC || PIE)) + ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); + ++ if (Triple.isLoongArch()) { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ // When targeting the LP64 ABI, PIC is the default. ++ if (ABIName == "lp64") ++ PIC = true; ++ // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot, ++ // does not use PIC level 2 for historical reasons. ++ IsPICLevelTwo = false; ++ } ++ + if (Triple.isMIPS()) { + StringRef CPUName; + StringRef ABIName; +diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp +index 665cdc313..aad574bbe 100644 +--- a/lib/Driver/ToolChains/Gnu.cpp ++++ b/lib/Driver/ToolChains/Gnu.cpp +@@ -9,6 +9,7 @@ + #include "Gnu.h" + #include "Arch/ARM.h" + #include "Arch/CSKY.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -255,6 +256,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + case llvm::Triple::armeb: + case llvm::Triple::thumbeb: + return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi"; ++ case llvm::Triple::loongarch32: ++ return "elf32loongarch"; ++ case llvm::Triple::loongarch64: ++ return "elf64loongarch"; + case llvm::Triple::m68k: + return "m68kelf"; + case llvm::Triple::ppc: +@@ -856,6 +861,63 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, + + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ++ ABIName = loongarch::getGnuCompatibleLoongArchABIName(ABIName); ++ ++ //FIXME: Currently gnu as doesn't support -march ++ //CmdArgs.push_back("-march=loongarch"); ++ //CmdArgs.push_back(CPUName.data()); ++ ++ //FIXME: modify loongarch::getGnuCompatibleLoongArchABIName() ++ CmdArgs.push_back("-mabi=lp64"); ++ //CmdArgs.push_back(ABIName.data()); ++ ++ // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, ++ // or -mshared (not implemented) is in effect. ++ if (RelocationModel == llvm::Reloc::Static) ++ CmdArgs.push_back("-mno-shared"); ++ ++ // LLVM doesn't support -mplt yet and acts as if it is always given. ++ // However, -mplt has no effect with the LP64 ABI. ++ if (ABIName != "64") ++ CmdArgs.push_back("-call_nonpic"); ++ ++ break; ++ ++ // Add the last -mfp32/-mfp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ A->claim(); ++ A->render(Args, CmdArgs); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlsx / -mno-lsx options. ++ if (A->getOption().matches(options::OPT_mlsx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlsx")); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlasx / -mno-lasx options. ++ if (A->getOption().matches(options::OPT_mlasx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlasx")); ++ } ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mhard_float, ++ options::OPT_msoft_float); ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, ++ options::OPT_msingle_float); ++ ++ AddAssemblerKPIC(getToolChain(), Args, CmdArgs); ++ break; ++ } + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -2294,6 +2356,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", + "s390x-suse-linux", "s390x-redhat-linux"}; + ++ static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; ++ static const char *const LoongArch64Triples[] = { ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-loongson-linux-gnu", "loongarch64-redhat-linux"}; + + using std::begin; + using std::end; +@@ -2466,6 +2532,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); + } + break; ++ case llvm::Triple::loongarch64: ++ LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); ++ TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); ++ break; + case llvm::Triple::m68k: + LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); + TripleAliases.append(begin(M68kTriples), end(M68kTriples)); +@@ -2823,6 +2893,7 @@ bool Generic_GCC::isPICDefault() const { + switch (getArch()) { + case llvm::Triple::x86_64: + return getTriple().isOSWindows(); ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return true; +@@ -2863,6 +2934,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { + case llvm::Triple::ppc64le: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + case llvm::Triple::sparc: + case llvm::Triple::sparcel: + case llvm::Triple::sparcv9: +diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp +index ceb1a982c..45adea6dd 100644 +--- a/lib/Driver/ToolChains/Linux.cpp ++++ b/lib/Driver/ToolChains/Linux.cpp +@@ -8,6 +8,7 @@ + + #include "Linux.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -85,6 +86,11 @@ std::string Linux::getMultiarchTriple(const Driver &D, + case llvm::Triple::aarch64_be: + return "aarch64_be-linux-gnu"; + ++ case llvm::Triple::loongarch32: ++ return "loongarch32-linux-gnu"; ++ case llvm::Triple::loongarch64: ++ return "loongarch64-linux-gnu"; ++ + case llvm::Triple::m68k: + return "m68k-linux-gnu"; + +@@ -473,6 +479,11 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple); ++ Loader = "ld.so.1"; ++ break; + case llvm::Triple::m68k: + LibDir = "lib"; + Loader = "ld.so.1"; +@@ -741,6 +752,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { + const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; + const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; + const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; ++ const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; + SanitizerMask Res = ToolChain::getSupportedSanitizers(); + Res |= SanitizerKind::Address; + Res |= SanitizerKind::PointerCompare; +@@ -751,19 +763,20 @@ SanitizerMask Linux::getSupportedSanitizers() const { + Res |= SanitizerKind::Memory; + Res |= SanitizerKind::Vptr; + Res |= SanitizerKind::SafeStack; +- if (IsX86_64 || IsMIPS64 || IsAArch64) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) + Res |= SanitizerKind::DataFlow; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || +- IsRISCV64 || IsSystemZ || IsHexagon) ++ IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Leak; +- if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || ++ IsLoongArch64) + Res |= SanitizerKind::Thread; + if (IsX86_64) + Res |= SanitizerKind::KernelMemory; + if (IsX86 || IsX86_64) + Res |= SanitizerKind::Function; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || +- IsPowerPC64 || IsHexagon) ++ IsPowerPC64 || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Scudo; + if (IsX86_64 || IsAArch64) { + Res |= SanitizerKind::HWAddress; +diff --git a/lib/Driver/XRayArgs.cpp b/lib/Driver/XRayArgs.cpp +index 63b575178..4e3ae3f25 100644 +--- a/lib/Driver/XRayArgs.cpp ++++ b/lib/Driver/XRayArgs.cpp +@@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { + case llvm::Triple::aarch64: + case llvm::Triple::hexagon: + case llvm::Triple::ppc64le: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt +index 6e2060991..2ec170cc2 100644 +--- a/lib/Headers/CMakeLists.txt ++++ b/lib/Headers/CMakeLists.txt +@@ -68,6 +68,12 @@ set(hlsl_files + hlsl/hlsl_intrinsics.h + ) + ++set(loongarch_files ++ lasxintrin.h ++ larchintrin.h ++ lsxintrin.h ++ ) ++ + set(mips_msa_files + msa.h + ) +@@ -220,6 +226,7 @@ set(files + ${hexagon_files} + ${hip_files} + ${hlsl_files} ++ ${loongarch_files} + ${mips_msa_files} + ${opencl_files} + ${ppc_files} +@@ -381,6 +388,7 @@ add_dependencies("clang-resource-headers" + "hexagon-resource-headers" + "hip-resource-headers" + "hlsl-resource-headers" ++ "loongarch-resource-headers" + "mips-resource-headers" + "ppc-resource-headers" + "ppc-htm-resource-headers" +@@ -404,6 +412,7 @@ add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_on + add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files}") + add_header_target("hexagon-resource-headers" "${hexagon_files}") + add_header_target("hip-resource-headers" "${hip_files}") ++add_header_target("loongarch-resource-headers" "${loongarch_files}") + add_header_target("mips-resource-headers" "${mips_msa_files}") + add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") + add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") +@@ -494,6 +503,12 @@ install( + EXCLUDE_FROM_ALL + COMPONENT hip-resource-headers) + ++install( ++ FILES ${loongarch_files} ++ DESTINATION ${header_install_dir} ++ EXCLUDE_FROM_ALL ++ COMPONENT loongarch-resource-headers) ++ + install( + FILES ${mips_msa_files} + DESTINATION ${header_install_dir} +diff --git a/lib/Headers/larchintrin.h b/lib/Headers/larchintrin.h +new file mode 100644 +index 000000000..b5acf218b +--- /dev/null ++++ b/lib/Headers/larchintrin.h +@@ -0,0 +1,338 @@ ++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch Base intrinsics ++// ++//===----------------------------------------------------------------------===// ++#ifndef __LOONGARCH_BASE_H ++#define __LOONGARCH_BASE_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct drdtime{ ++ unsigned long dvalue; ++ unsigned long dtimeid; ++} __drdtime_t; ++ ++typedef struct rdtime{ ++ unsigned int value; ++ unsigned int timeid; ++} __rdtime_t; ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrrd_w(/*uimm14_32*/ _1) \ ++ ((unsigned int)__builtin_loongarch_csrrd_w(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrwr_w(/*unsigned int*/ _1, /*uimm14_32*/ _2) \ ++ ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned int, unsigned int, uimm14_32 */ ++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, \ ++ /*uimm14_32*/ _3) \ ++ ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ ++ (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrrd_d(/*uimm14*/ _1) \ ++ ((unsigned long int)__builtin_loongarch_csrrd_d(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrwr_d(/*unsigned long int*/ _1, /*uimm14*/ _2) \ ++ ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ ++ (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned long int, unsigned long int, uimm14 */ ++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ ++ /*uimm14*/ _3) \ ++ ((unsigned long int)__builtin_loongarch_csrxchg_d( \ ++ (unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned char __iocsrrd_b(unsigned int _1) ++{ ++ return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned short __iocsrrd_h(unsigned int _1) ++{ ++ return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __iocsrrd_w(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned long int __iocsrrd_d(unsigned int _1) ++{ ++ return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_b(unsigned char _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_h(unsigned short _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_w(unsigned int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++} ++ ++extern __inline unsigned int ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++ __cpucfg(unsigned int _1) { ++ return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned int, simm12 */ ++#define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ ++ ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */ ++#define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ ++ ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) ++ ++#define __rdtime_d __builtin_loongarch_rdtime_d ++#define __rdtimel_w __builtin_loongarch_rdtimel_w ++#define __rdtimeh_w __builtin_loongarch_rdtimeh_w ++ ++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtime_d (void) ++{ ++ __drdtime_t drdtime; ++ __asm__ volatile ( ++ "rdtime.d\t%[val],%[tid]\n\t" ++ : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) ++ : ++ ); ++ return drdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimeh_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimeh.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimel_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimel.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbclr() ++{ ++ return (void)__builtin_loongarch_tlbclr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbflush() ++{ ++ return (void)__builtin_loongarch_tlbflush(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbfill() ++{ ++ return (void)__builtin_loongarch_tlbfill(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbrd() ++{ ++ return (void)__builtin_loongarch_tlbrd(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbwr() ++{ ++ return (void)__builtin_loongarch_tlbwr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbsrch() ++{ ++ return (void)__builtin_loongarch_tlbsrch(); ++} ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __syscall(/*uimm15*/ _1) ((void)__builtin_loongarch_syscall(_1)) ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __break(/*uimm15*/ _1) ((void)__builtin_loongarch_break(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __dbar(/*uimm15*/ _1) ((void)__builtin_loongarch_dbar(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __ibar(/*uimm15*/ _1) ((void)__builtin_loongarch_ibar(_1)) ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtle_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtgt_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++} ++ ++#define __movfcsr2gr(uimm5) \ ++({ \ ++ unsigned int rd; \ ++ __asm__ volatile ( \ ++ "movfcsr2gr %0, $fcsr" #uimm5 \ ++ : "=&r"(rd) \ ++ : \ ++ ); rd; \ ++}) ++ ++#define __movgr2fcsr(uimm5, rj) \ ++{ \ ++ __asm__ volatile ( \ ++ "movgr2fcsr $fcsr" #uimm5 ", %0" \ ++ : \ ++ : "r" (rj) \ ++ ); \ ++} ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* __LOONGARCH_BASE_H */ +diff --git a/lib/Headers/lasxintrin.h b/lib/Headers/lasxintrin.h +new file mode 100644 +index 000000000..c454b0c9e +--- /dev/null ++++ b/lib/Headers/lasxintrin.h +@@ -0,0 +1,5337 @@ ++//===----------- lasxintrin.h - LoongArch LASX intrinsics ++//------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LASX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_ASXINTRIN_H ++#define _GCC_LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui1. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, USI. */ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, USI. */ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, USI. */ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, USI. */ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V32QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V16HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V8SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V4DI, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V32QI, V32QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V16HI, V16HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V8SI, V8SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V4DI, V4DI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V32QI, UQI. */ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V8SI, V16HI, UQI. */ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V8SI, UQI. */ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UQI. */ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UQI. */ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UQI. */ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI. */ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI. */ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI. */ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI. */ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI. */ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, i13. */ ++/* Data types in instruction templates: V4DI, HI. */ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void const *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, SI, UQI. */ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, DI, UQI. */ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, USI. */ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si11. */ ++/* Data types in instruction templates: V16HI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si10. */ ++/* Data types in instruction templates: V8SI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si9. */ ++/* Data types in instruction templates: V4DI, CVPOINTER, SI. */ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: SI, V8SI, UQI. */ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: USI, V8SI, UQI. */ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: DI, V4DI, UQI. */ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: UDI, V4DI, UQI. */ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV16HI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ ++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ ++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ ++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ ++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ ++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ ++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) ++ ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ ++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DF, V4DF, UQI. */ ++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ++ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SF, V8SF, UQI. */ ++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ++ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V32QI, HI. */ ++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V4DI, HI. */ ++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V16HI, HI. */ ++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) ++ ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V8SI, HI. */ ++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) ++ ++#endif /* defined(__loongarch_asx). */ ++#endif /* _GCC_LOONGSON_ASXINTRIN_H. */ +diff --git a/lib/Headers/lsxintrin.h b/lib/Headers/lsxintrin.h +new file mode 100644 +index 000000000..48344c209 +--- /dev/null ++++ b/lib/Headers/lsxintrin.h +@@ -0,0 +1,5162 @@ ++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LSX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_SXINTRIN_H ++#define _GCC_LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V16QI, V16QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V8HI, V8HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V4SI, V4SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V2DI, V2DI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, USI. */ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, USI. */ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, USI. */ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, USI. */ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V16QI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V8HI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V4SI, SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V2DI, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: SI, V16QI, UQI. */ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: SI, V8HI, UQI. */ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: SI, V4SI, UQI. */ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: DI, V2DI, UQI. */ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: USI, V16QI, UQI. */ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: USI, V8HI, UQI. */ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: USI, V4SI, UQI. */ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: UDI, V2DI, UQI. */ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, SI, UQI. */ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, SI, UQI. */ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, SI, UQI. */ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, DI, UQI. */ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V16QI, UQI. */ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V4SI, V8HI, UQI. */ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V4SI, UQI. */ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UQI. */ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UQI. */ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UQI. */ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI. */ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI. */ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI. */ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si11. */ ++/* Data types in instruction templates: V8HI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si10. */ ++/* Data types in instruction templates: V4SI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si9. */ ++/* Data types in instruction templates: V2DI, CVPOINTER, SI. */ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV8HI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, i13. */ ++/* Data types in instruction templates: V2DI, HI. */ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void const *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ ++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ ++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ ++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ ++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ ++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ ++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) ++ ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ ++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V16QI, HI. */ ++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V2DI, HI. */ ++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V8HI, HI. */ ++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) ++ ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V4SI, HI. */ ++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _GCC_LOONGSON_SXINTRIN_H */ +diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp +index dae51d069..5504f9937 100644 +--- a/lib/Sema/SemaChecking.cpp ++++ b/lib/Sema/SemaChecking.cpp +@@ -1981,6 +1981,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); + } + } + +@@ -4445,6 +4448,559 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, + return false; + } + ++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the ++// intrinsic is correct. ++// ++// FIXME: The size tests here should instead be tablegen'd along with the ++// definitions from include/clang/Basic/BuiltinsLoongArch.def. ++// FIXME: GCC is strict on signedness for some of these intrinsics, we should ++// be too. ++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall) { ++ unsigned i = 0, l = 0, u = 0, m = 0; ++ switch (BuiltinID) { ++ default: return false; ++ // LSX/LASX intrinsics. ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate. ++ // The first block of intrinsics actually have an unsigned 5 bit field, ++ // not a df/n field. ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ i = 1; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ i = 2; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ // These intrinsics take an unsigned 6 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ i = 1; ++ l = 0; ++ u = 63; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ i = 2; ++ l = 0; ++ u = 63; ++ break; ++ // These intrinsics take an unsigned 7 bit immediate. ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ i = 2; ++ l = 0; ++ u = 127; ++ break; ++ // These intrinsics take a signed 5 bit immediate. ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ i = 1; ++ l = -16; ++ u = 15; ++ break; ++ // These intrinsics take a signed 9 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ i = 1; ++ l = -256; ++ u = 255; ++ break; ++ // These intrinsics take an unsigned 8 bit immediate. ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ i = 1; ++ l = 0; ++ u = 255; ++ break; ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ i = 2; ++ l = 0; ++ u = 255; ++ break; ++ // df/n format ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ i = 2; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ // These intrinsics take an unsigned 2 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ i = 1; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ // These intrinsics take an unsigned 1 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ i = 1; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ // Memory offsets and immediate loads. ++ // These intrinsics take a signed 10 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ i = 1; ++ l = -512; ++ u = 511; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ i = 1; ++ l = -1024; ++ u = 1023; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvld: ++ case LoongArch::BI__builtin_lsx_vld: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lsx_vst: ++ case LoongArch::BI__builtin_lasx_xvst: ++ i = 2; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldi: ++ case LoongArch::BI__builtin_lsx_vldi: ++ i = 0; ++ l = -4096; ++ u = 4095; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate. ++ case LoongArch::BI__builtin_loongarch_cacop_w: ++ case LoongArch::BI__builtin_loongarch_cacop_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || ++ SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ // These intrinsics take an unsigned 14 bit immediate. ++ case LoongArch::BI__builtin_loongarch_csrrd_w: ++ case LoongArch::BI__builtin_loongarch_csrrd_d: ++ i = 0; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrwr_w: ++ case LoongArch::BI__builtin_loongarch_csrwr_d: ++ i = 1; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: ++ case LoongArch::BI__builtin_loongarch_csrxchg_d: ++ i = 2; ++ l = 0; ++ u = 16383; ++ break; ++ // These intrinsics take an unsigned 15 bit immediate. ++ case LoongArch::BI__builtin_loongarch_dbar: ++ case LoongArch::BI__builtin_loongarch_ibar: ++ case LoongArch::BI__builtin_loongarch_syscall: ++ case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break; ++ } ++ ++ if (!m) ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u); ++ ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u) || ++ SemaBuiltinConstantArgMultiple(TheCall, i, m); ++} ++ + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *TheCall) { + if (BuiltinID == SystemZ::BI__builtin_tabort) { +diff --git a/test/CodeGen/LoongArch/abi-lp64d.c b/test/CodeGen/LoongArch/abi-lp64d.c +new file mode 100644 +index 000000000..aa8f63094 +--- /dev/null ++++ b/test/CodeGen/LoongArch/abi-lp64d.c +@@ -0,0 +1,471 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++ // RUN: -emit-llvm %s -o - | FileCheck %s ++ ++/// This test checks the calling convention of the lp64d ABI. ++ ++#include ++#include ++ ++/// Part 0: C Data Types and Alignment. ++ ++/// `char` datatype is signed by default. ++/// In most cases, the unsigned integer data types are zero-extended when stored ++/// in general-purpose register, and the signed integer data types are ++/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as ++/// unsigned int, are stored in general-purpose registers as proper sign ++/// extensions of their 32-bit values. ++ ++// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool() ++_Bool check_bool() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i8 @check_char() ++char check_char() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i16 @check_short() ++short check_short() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_int() ++int check_int() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_long() ++long check_long() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_longlong() ++long long check_longlong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar() ++unsigned char check_uchar() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort() ++unsigned short check_ushort() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_uint() ++unsigned int check_uint() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulong() ++unsigned long check_ulong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulonglong() ++unsigned long long check_ulonglong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} float @check_float() ++float check_float() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} double @check_double() ++double check_double() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} fp128 @check_longdouble() ++long double check_longdouble() { return 0; } ++ ++/// Part 1: Scalar arguments and return value. ++ ++/// The lp64d abi says: ++/// 1. 1 < WOA <= GRLEN ++/// a. Argument is passed in a single argument register, or on the stack by ++/// value if none is available. ++/// i. If the argument is floating-point type, the argument is passed in FAR. if ++/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s ++/// passed on the stack. When passed in registers or on the stack, ++/// floating-point types narrower than GRLEN bits are widened to GRLEN bits, ++/// with the upper bits undefined. ++/// ii. If the argument is integer or pointer type, the argument is passed in ++/// GAR. If no GAR is available, it’s passed on the stack. When passed in ++/// registers or on the stack, the unsigned integer scalars narrower than GRLEN ++/// bits are zero-extended to GRLEN bits, and the signed integer scalars are ++/// sign-extended. ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in ++/// the lower-numbered register and the high-order GRLEN bits in the ++/// higher-numbered register. If exactly one register is available, the ++/// low-order GRLEN bits are passed in the register and the high-order GRLEN ++/// bits are passed on the stack. If no GAR is available, it’s passed on the ++/// stack. ++ ++/// Note that most of these conventions are handled at the llvm side, so here we ++/// only check the correctness of argument (or return value)'s sign/zero ++/// extension attribute. ++ ++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p) ++int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, ++ uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i, ++ int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n, ++ uint32_t o, int64_t p) { ++ return 0; ++} ++ ++/// Part 2: Structure arguments and return value. ++ ++/// The lp64d abi says: ++/// Empty structures are ignored by C compilers which support them as a ++/// non-standard extension(same as union arguments and return values). Bits ++/// unused due to padding, and bits past the end of a structure whose size in ++/// bits is not divisible by GRLEN, are undefined. And the layout of the ++/// structure on the stack is consistent with that in memory. ++ ++/// Check empty structs are ignored. ++ ++struct empty_s {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_s() ++struct empty_s f_empty_s(struct empty_s x) { ++ return x; ++} ++ ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The structure has only fixed-point members. If there is an available GAR, ++/// the structure is passed through the GAR by value passing; If no GAR is ++/// available, it’s passed on the stack. ++ ++struct i16x4_s { ++ int16_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce) ++struct i16x4_s f_i16x4_s(struct i16x4_s x) { ++ return x; ++} ++ ++/// b. The structure has only floating-point members: ++/// i. One floating-point member. The argument is passed in a FAR; If no FAR is ++/// available, the value is passed in a GAR; if no GAR is available, the value ++/// is passed on the stack. ++ ++struct f32x1_s { ++ float a; ++}; ++ ++struct f64x1_s { ++ double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) ++struct f32x1_s f_f32x1_s(struct f32x1_s x) { ++ return x; ++} ++ ++// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0) ++struct f64x1_s f_f64x1_s(struct f64x1_s x) { ++ return x; ++} ++ ++/// ii. Two floating-point members. The argument is passed in a pair of ++/// available FAR, with the low-order float member bits in the lower-numbered ++/// FAR and the high-order float member bits in the higher-numbered FAR. If the ++/// number of available FAR is less than 2, it’s passed in a GAR, and passed on ++/// the stack if no GAR is available. ++ ++struct f32x2_s { ++ float a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) ++struct f32x2_s f_f32x2_s(struct f32x2_s x) { ++ return x; ++} ++ ++/// c. The structure has both fixed-point and floating-point members, i.e. the ++/// structure has one float member and... ++/// i. Multiple fixed-point members. If there are available GAR, the structure ++/// is passed in a GAR, and passed on the stack if no GAR is available. ++ ++struct f32x1_i16x2_s { ++ float a; ++ int16_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) ++struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { ++ return x; ++} ++ ++/// ii. Only one fixed-point member. If one FAR and one GAR are available, the ++/// floating-point member of the structure is passed in the FAR, and the integer ++/// member of the structure is passed in the GAR; If no floating-point register ++/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s ++/// passed on the stack. ++ ++struct f32x1_i32x1_s { ++ float a; ++ int32_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) ++struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. Only fixed-point members. ++/// i. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct i64x2_s { ++ int64_t a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce) ++struct i64x2_s f_i64x2_s(struct i64x2_s x) { ++ return x; ++} ++ ++/// b. Only floating-point members. ++/// i. The structure has one long double member or one double member and two ++/// adjacent float members or 3-4 float members. The argument is passed in a ++/// pair of available GAR, with the low-order bits in the lower-numbered GAR and ++/// the high-order bits in the higher-numbered GAR. If only one GAR is ++/// available, the low-order bits are in the GAR and the high-order bits are on ++/// the stack, and passed on the stack if no GAR is available. ++ ++struct f128x1_s { ++ long double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce) ++struct f128x1_s f_f128x1_s(struct f128x1_s x) { ++ return x; ++} ++ ++struct f64x1_f32x2_s { ++ double a; ++ float b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce) ++struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) { ++ return x; ++} ++ ++struct f32x3_s { ++ float a, b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce) ++struct f32x3_s f_f32x3_s(struct f32x3_s x) { ++ return x; ++} ++ ++struct f32x4_s { ++ float a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce) ++struct f32x4_s f_f32x4_s(struct f32x4_s x) { ++ return x; ++} ++ ++/// ii. The structure with two double members is passed in a pair of available ++/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with ++/// one double member and one float member is same. ++ ++struct f64x2_s { ++ double a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1) ++struct f64x2_s f_f64x2_s(struct f64x2_s x) { ++ return x; ++} ++ ++/// c. Both fixed-point and floating-point members. ++/// i. The structure has one double member and only one fixed-point member. ++/// A. If one FAR and one GAR are available, the floating-point member of the ++/// structure is passed in the FAR, and the integer member of the structure is ++/// passed in the GAR; If no floating-point registers but two GARs are ++/// available, it’s passed in the two GARs; If only one GAR is available, the ++/// low-order bits are in the GAR and the high-order bits are on the stack; And ++/// it’s passed on the stack if no GAR is available. ++ ++struct f64x1_i64x1_s { ++ double a; ++ int64_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1) ++struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) { ++ return x; ++} ++ ++/// ii. Others ++/// A. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct f64x1_i32x2_s { ++ double a; ++ int32_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce) ++struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) { ++ return x; ++} ++ ++struct f32x2_i32x2_s { ++ float a, b; ++ int32_t c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce) ++struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++struct i64x4_s { ++ int64_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, ptr{{.*}} %x) ++struct i64x4_s f_i64x4_s(struct i64x4_s x) { ++ return x; ++} ++ ++struct f64x4_s { ++ double a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, ptr{{.*}} %x) ++struct f64x4_s f_f64x4_s(struct f64x4_s x) { ++ return x; ++} ++ ++/// Part 3: Union arguments and return value. ++ ++/// Check empty unions are ignored. ++ ++union empty_u {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_u() ++union empty_u f_empty_u(union empty_u x) { ++ return x; ++} ++ ++/// Union is passed in GAR or stack. ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The argument is passed in a GAR, or on the stack by value if no GAR is ++/// available. ++ ++union i32_f32_u { ++ int32_t a; ++ float b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce) ++union i32_f32_u f_i32_f32_u(union i32_f32_u x) { ++ return x; ++} ++ ++union i64_f64_u { ++ int64_t a; ++ double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce) ++union i64_f64_u f_i64_f64_u(union i64_f64_u x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack. The arguments are passed ++/// on the stack when no GAR is available. ++ ++union i128_f128_u { ++ __int128_t a; ++ long double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce) ++union i128_f128_u f_i128_f128_u(union i128_f128_u x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++union i64_arr3_u { ++ int64_t a[3]; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, ptr{{.*}} %x) ++union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { ++ return x; ++} ++ ++/// Part 4: Complex number arguments and return value. ++ ++/// A complex floating-point number, or a structure containing just one complex ++/// floating-point number, is passed as though it were a structure containing ++/// two floating-point reals. ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1) ++float __complex__ f_floatcomplex(float __complex__ x) { return x; } ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1) ++double __complex__ f_doublecomplex(double __complex__ x) { return x; } ++ ++struct floatcomplex_s { ++ float __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1) ++struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) { ++ return x; ++} ++ ++struct doublecomplex_s { ++ double __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1) ++struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { ++ return x; ++} ++ ++/// Part 5: Variadic arguments. ++ ++/// Variadic arguments are passed in GARs in the same manner as named arguments. ++ ++int f_va_callee(int, ...); ++ ++// CHECK-LABEL: define{{.*}} void @f_va_caller() ++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) ++void f_va_caller(void) { ++ f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, ++ (struct i64x2_s){10, 11}); ++} ++ ++// CHECK-LABE: define signext i32 @f_va_int(ptr{{.*}} %fmt, ...) ++// CHECK: entry: ++// CHECK: %fmt.addr = alloca ptr, align 8 ++// CHECK: %va = alloca ptr, align 8 ++// CHECK: %v = alloca i32, align 4 ++// CHECK: store ptr %fmt, ptr %fmt.addr, align 8 ++// CHECK: call void @llvm.va_start(ptr %va) ++// CHECK: %argp.cur = load ptr, ptr %va, align 8 ++// CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8 ++// CHECK: store ptr %argp.next, ptr %va, align 8 ++// CHECK: %0 = load i32, ptr %argp.cur, align 8 ++// CHECK: store i32 %0, ptr %v, align 4 ++// CHECK: call void @llvm.va_end(ptr %va) ++// CHECK: %1 = load i32, ptr %v, align 4 ++// CHECK: ret i32 %1 ++// CHECK: } ++int f_va_int(char *fmt, ...) { ++ __builtin_va_list va; ++ __builtin_va_start(va, fmt); ++ int v = __builtin_va_arg(va, int); ++ __builtin_va_end(va); ++ return v; ++} +diff --git a/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +new file mode 100644 +index 000000000..e4a03d782 +--- /dev/null ++++ b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +@@ -0,0 +1,49 @@ ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \ ++// RUN: | FileCheck %s ++ ++float f; ++double d; ++ ++// CHECK-LABEL: @reg_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]]) ++// CHECK: ret void ++void reg_float() { ++ float a = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]]) ++// CHECK: ret void ++void r4_float() { ++ register float a asm("$r4") = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @reg_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]]) ++// CHECK: ret void ++void reg_double() { ++ double a = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]]) ++// CHECK: ret void ++void r4_double() { ++ register double a asm("$r4") = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} +diff --git a/test/CodeGen/builtins-loongarch-base.c b/test/CodeGen/builtins-loongarch-base.c +new file mode 100644 +index 000000000..cdff582fa +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-base.c +@@ -0,0 +1,409 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++typedef char i8; ++typedef unsigned char u8; ++typedef short i16; ++typedef unsigned short u16; ++typedef int i32; ++typedef unsigned int u32; ++ ++#if __LONG_MAX__ == __LONG_LONG_MAX__ ++typedef long int i64; ++typedef unsigned long int u64; ++#else ++typedef long long i64; ++typedef unsigned long long u64; ++#endif ++ ++__drdtime_t drdtime; ++__rdtime_t rdtime; ++ ++void cpucfg(){ ++ ++ u32 u32_r, u32_a; ++ // __cpucfg ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg ++ ++} ++ ++void csrrd_w() { ++ ++ u32 u32_r; ++ // __csrrd_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrrd_w(1); // CHECK: call i32 @llvm.loongarch.csrrd.w ++} ++ ++void csrrd_d() { ++ ++ u64 u64_r; ++ // __csrrd_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrrd_d(1); // CHECK: call i64 @llvm.loongarch.csrrd.d ++} ++ ++void csrwr_w() { ++ ++ u32 u32_r, u32_a; ++ // __csrwr_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrwr_w(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr.w ++} ++ ++void csrwr_d() { ++ ++ u64 u64_r, u64_a; ++ // __csrwr_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrwr_d(u64_a, 1); // CHECK: call i64 @llvm.loongarch.csrwr.d ++} ++ ++void csrxchg_w() { ++ ++ u32 u32_r, u32_a, u32_b; ++ // __csrxchg_w ++ // rd, rj, csr_num ++ // unsigned int, unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrxchg_w(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg.w ++} ++ ++void csrxchg_d() { ++ ++ u64 u64_r, u64_a, u64_b; ++ // __csrxchg_d ++ // rd, rj, csr_num ++ // unsigned long int, unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrxchg_d(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.csrxchg.d ++} ++ ++void iocsrrd_b(){ ++ ++ u32 u32_a; ++ u8 u8_r; ++ // __iocsrrd_b ++ // rd, rj ++ // unsigned char, unsigned int ++ u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b ++ ++} ++ ++void iocsrrd_h(){ ++ ++ u32 u32_a; ++ u16 u16_r; ++ // __iocsrrd_h ++ // rd, rj ++ // unsigned short, unsigned int ++ u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h ++ ++} ++ ++void iocsrrd_w(){ ++ ++ u32 u32_r, u32_a; ++ // __iocsrrd_w ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w ++ ++} ++ ++void iocsrrd_d(){ ++ ++ u32 u32_a; ++ u64 u64_r; ++ // __iocsrrd_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d ++ ++} ++ ++void iocsrwr_b(){ ++ ++ u32 u32_a; ++ u8 u8_a; ++ // __iocsrwr_b ++ // rd, rj ++ // unsigned char, unsigned int ++ __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b ++ ++} ++ ++void iocsrwr_h(){ ++ ++ u32 u32_a; ++ u16 u16_a; ++ // __iocsrwr_h ++ // rd, rj ++ // unsigned short, unsigned int ++ __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h ++ ++} ++ ++void iocsrwr_w(){ ++ ++ u32 u32_a, u32_b; ++ // __iocsrwr_w ++ // rd, rj ++ // unsigned int, unsigned int ++ __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w ++ ++} ++ ++void iocsrwr_d(){ ++ ++ u32 u32_a; ++ u64 u64_a; ++ // __iocsrwr_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d ++ ++} ++ ++void cacop_w() { ++ ++ i32 i32_a; ++ // __cacop_w ++ // op, rj, si12 ++ // uimm5, unsigned int, simm12 ++ __builtin_loongarch_cacop_w(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop.w ++} ++ ++void cacop_d() { ++ ++ i64 i64_a; ++ // __cacop_d ++ // op, rj, si12 ++ // uimm5, unsigned long int, simm12 ++ __builtin_loongarch_cacop_d(1, i64_a, 2); // CHECK: void @llvm.loongarch.cacop.d ++} ++ ++void rdtime_d(){ ++ ++ drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimeh_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimel_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void crc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w ++ ++} ++ ++void crc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w ++ ++} ++ ++void crc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w ++ ++} ++ ++void crc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w ++ ++} ++ ++void crcc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crcc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w ++ ++} ++ ++void crcc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crcc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w ++ ++} ++ ++void crcc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crcc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w ++ ++} ++ ++void crcc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crcc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w ++ ++} ++ ++void tlbclr(){ ++ ++ // __tlbclr ++ __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr ++ ++} ++ ++void tlbflush(){ ++ ++ // __tlbflush ++ __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush ++ ++} ++ ++void tlbfill(){ ++ ++ // __tlbfill ++ __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill ++ ++} ++ ++void tlbrd(){ ++ ++ // __tlbrd ++ __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd ++ ++} ++ ++void tlbwr(){ ++ ++ // __tlbwr ++ __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr ++ ++} ++ ++void tlbsrch(){ ++ ++ // __tlbsrch ++ __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch ++ ++} ++ ++void syscall(){ ++ ++ // __syscall ++ // Code ++ // uimm15 ++ __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall ++ ++} ++ ++void break_builtin(){ ++ ++ // __break ++ // Code ++ // uimm15 ++ __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break ++ ++} ++ ++void asrtle_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtle_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d ++ ++} ++ ++void asrtgt_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtgt_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d ++ ++} ++ ++void dbar(){ ++ ++ // __dbar ++ // hint ++ // uimm15 ++ __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar ++ ++} ++ ++void ibar(){ ++ ++ // __ibar ++ // hint ++ // uimm15 ++ __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar ++ ++} ++ ++void movfcsr2gr(){ ++ ++ u32 u32_r; ++ // __movfcsr2gr ++ u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() ++ ++} ++ ++ ++void movgr2fcsr() { ++ ++ u32 u32_a; ++ // __movgr2fcsr ++ __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) ++ ++} +diff --git a/test/CodeGen/builtins-loongarch-lasx-error.c b/test/CodeGen/builtins-loongarch-lasx-error.c +new file mode 100644 +index 000000000..99f2687e4 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lasx-error.c +@@ -0,0 +1,266 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v32i8_r = __lasx_xvslli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvslli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvslli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmini_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmini_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmini_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmini_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmini_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslti_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslti_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslti_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslti_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvslei_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslei_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslei_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslei_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslei_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsat_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsat_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsat_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsat_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvsat_du(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v32u8_r = __lasx_xvandi_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvnori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvxori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvld(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(v32i8_a, &v32i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++} +diff --git a/test/CodeGen/builtins-loongarch-lasx.c b/test/CodeGen/builtins-loongarch-lasx.c +new file mode 100644 +index 000000000..0d6a54cb0 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lasx.c +@@ -0,0 +1,3772 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1_b 1 ++#define ui2 1 ++#define ui2_b ui2 ++#define ui3 4 ++#define ui3_b ui3 ++#define ui4 7 ++#define ui4_b ui4 ++#define ui5 25 ++#define ui5_b ui5 ++#define ui6 44 ++#define ui6_b ui6 ++#define ui7 100 ++#define ui7_b ui7 ++#define ui8 127 //200 ++#define ui8_b ui8 ++#define si5_b -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 0 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lasx_xvsll_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b( ++ ++ // __lasx_xvsll_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h( ++ ++ // __lasx_xvsll_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w( ++ ++ // __lasx_xvsll_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d( ++ ++ // __lasx_xvslli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b( ++ ++ // __lasx_xvslli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h( ++ ++ // __lasx_xvslli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w( ++ ++ // __lasx_xvslli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d( ++ ++ // __lasx_xvsra_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b( ++ ++ // __lasx_xvsra_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h( ++ ++ // __lasx_xvsra_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w( ++ ++ // __lasx_xvsra_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d( ++ ++ // __lasx_xvsrai_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b( ++ ++ // __lasx_xvsrai_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h( ++ ++ // __lasx_xvsrai_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w( ++ ++ // __lasx_xvsrai_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d( ++ ++ // __lasx_xvsrar_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b( ++ ++ // __lasx_xvsrar_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h( ++ ++ // __lasx_xvsrar_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w( ++ ++ // __lasx_xvsrar_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d( ++ ++ // __lasx_xvsrari_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b( ++ ++ // __lasx_xvsrari_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h( ++ ++ // __lasx_xvsrari_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w( ++ ++ // __lasx_xvsrari_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d( ++ ++ // __lasx_xvsrl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b( ++ ++ // __lasx_xvsrl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h( ++ ++ // __lasx_xvsrl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w( ++ ++ // __lasx_xvsrl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d( ++ ++ // __lasx_xvsrli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b( ++ ++ // __lasx_xvsrli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h( ++ ++ // __lasx_xvsrli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w( ++ ++ // __lasx_xvsrli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d( ++ ++ // __lasx_xvsrlr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b( ++ ++ // __lasx_xvsrlr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h( ++ ++ // __lasx_xvsrlr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w( ++ ++ // __lasx_xvsrlr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d( ++ ++ // __lasx_xvsrlri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b( ++ ++ // __lasx_xvsrlri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h( ++ ++ // __lasx_xvsrlri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w( ++ ++ // __lasx_xvsrlri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d( ++ ++ // __lasx_xvbitclr_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b( ++ ++ // __lasx_xvbitclr_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h( ++ ++ // __lasx_xvbitclr_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w( ++ ++ // __lasx_xvbitclr_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d( ++ ++ // __lasx_xvbitclri_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b( ++ ++ // __lasx_xvbitclri_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h( ++ ++ // __lasx_xvbitclri_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w( ++ ++ // __lasx_xvbitclri_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d( ++ ++ // __lasx_xvbitset_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b( ++ ++ // __lasx_xvbitset_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h( ++ ++ // __lasx_xvbitset_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w( ++ ++ // __lasx_xvbitset_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d( ++ ++ // __lasx_xvbitseti_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b( ++ ++ // __lasx_xvbitseti_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h( ++ ++ // __lasx_xvbitseti_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w( ++ ++ // __lasx_xvbitseti_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d( ++ ++ // __lasx_xvbitrev_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b( ++ ++ // __lasx_xvbitrev_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h( ++ ++ // __lasx_xvbitrev_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w( ++ ++ // __lasx_xvbitrev_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d( ++ ++ // __lasx_xvbitrevi_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b( ++ ++ // __lasx_xvbitrevi_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h( ++ ++ // __lasx_xvbitrevi_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w( ++ ++ // __lasx_xvbitrevi_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d( ++ ++ // __lasx_xvadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b( ++ ++ // __lasx_xvadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h( ++ ++ // __lasx_xvadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w( ++ ++ // __lasx_xvadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d( ++ ++ // __lasx_xvaddi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu( ++ ++ // __lasx_xvaddi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu( ++ ++ // __lasx_xvaddi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu( ++ ++ // __lasx_xvaddi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du( ++ ++ // __lasx_xvsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b( ++ ++ // __lasx_xvsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h( ++ ++ // __lasx_xvsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w( ++ ++ // __lasx_xvsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d( ++ ++ // __lasx_xvsubi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu( ++ ++ // __lasx_xvsubi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu( ++ ++ // __lasx_xvsubi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu( ++ ++ // __lasx_xvsubi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du( ++ ++ // __lasx_xvmax_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b( ++ ++ // __lasx_xvmax_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h( ++ ++ // __lasx_xvmax_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w( ++ ++ // __lasx_xvmax_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d( ++ ++ // __lasx_xvmaxi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b( ++ ++ // __lasx_xvmaxi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h( ++ ++ // __lasx_xvmaxi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w( ++ ++ // __lasx_xvmaxi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d( ++ ++ // __lasx_xvmax_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu( ++ ++ // __lasx_xvmax_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu( ++ ++ // __lasx_xvmax_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu( ++ ++ // __lasx_xvmax_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du( ++ ++ // __lasx_xvmaxi_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu( ++ ++ // __lasx_xvmaxi_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu( ++ ++ // __lasx_xvmaxi_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu( ++ ++ // __lasx_xvmaxi_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du( ++ ++ // __lasx_xvmin_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b( ++ ++ // __lasx_xvmin_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h( ++ ++ // __lasx_xvmin_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w( ++ ++ // __lasx_xvmin_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d( ++ ++ // __lasx_xvmini_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b( ++ ++ // __lasx_xvmini_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h( ++ ++ // __lasx_xvmini_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w( ++ ++ // __lasx_xvmini_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d( ++ ++ // __lasx_xvmin_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu( ++ ++ // __lasx_xvmin_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu( ++ ++ // __lasx_xvmin_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu( ++ ++ // __lasx_xvmin_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du( ++ ++ // __lasx_xvmini_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu( ++ ++ // __lasx_xvmini_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu( ++ ++ // __lasx_xvmini_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu( ++ ++ // __lasx_xvmini_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du( ++ ++ // __lasx_xvseq_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b( ++ ++ // __lasx_xvseq_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h( ++ ++ // __lasx_xvseq_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w( ++ ++ // __lasx_xvseq_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d( ++ ++ // __lasx_xvseqi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b( ++ ++ // __lasx_xvseqi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h( ++ ++ // __lasx_xvseqi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w( ++ ++ // __lasx_xvseqi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d( ++ ++ // __lasx_xvslt_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b( ++ ++ // __lasx_xvslt_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h( ++ ++ // __lasx_xvslt_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w( ++ ++ // __lasx_xvslt_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d( ++ ++ // __lasx_xvslti_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b( ++ ++ // __lasx_xvslti_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h( ++ ++ // __lasx_xvslti_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w( ++ ++ // __lasx_xvslti_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d( ++ ++ // __lasx_xvslt_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu( ++ ++ // __lasx_xvslt_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu( ++ ++ // __lasx_xvslt_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu( ++ ++ // __lasx_xvslt_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du( ++ ++ // __lasx_xvslti_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu( ++ ++ // __lasx_xvslti_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu( ++ ++ // __lasx_xvslti_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu( ++ ++ // __lasx_xvslti_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du( ++ ++ // __lasx_xvsle_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b( ++ ++ // __lasx_xvsle_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h( ++ ++ // __lasx_xvsle_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w( ++ ++ // __lasx_xvsle_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d( ++ ++ // __lasx_xvslei_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b( ++ ++ // __lasx_xvslei_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h( ++ ++ // __lasx_xvslei_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w( ++ ++ // __lasx_xvslei_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d( ++ ++ // __lasx_xvsle_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu( ++ ++ // __lasx_xvsle_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu( ++ ++ // __lasx_xvsle_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu( ++ ++ // __lasx_xvsle_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du( ++ ++ // __lasx_xvslei_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu( ++ ++ // __lasx_xvslei_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu( ++ ++ // __lasx_xvslei_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu( ++ ++ // __lasx_xvslei_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du( ++ ++ // __lasx_xvsat_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b( ++ ++ // __lasx_xvsat_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h( ++ ++ // __lasx_xvsat_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w( ++ ++ // __lasx_xvsat_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d( ++ ++ // __lasx_xvsat_bu ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu( ++ ++ // __lasx_xvsat_hu ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu( ++ ++ // __lasx_xvsat_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu( ++ ++ // __lasx_xvsat_du ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du( ++ ++ // __lasx_xvadda_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b( ++ ++ // __lasx_xvadda_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h( ++ ++ // __lasx_xvadda_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w( ++ ++ // __lasx_xvadda_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d( ++ ++ // __lasx_xvsadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b( ++ ++ // __lasx_xvsadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h( ++ ++ // __lasx_xvsadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w( ++ ++ // __lasx_xvsadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d( ++ ++ // __lasx_xvsadd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu( ++ ++ // __lasx_xvsadd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu( ++ ++ // __lasx_xvsadd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu( ++ ++ // __lasx_xvsadd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du( ++ ++ // __lasx_xvavg_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b( ++ ++ // __lasx_xvavg_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h( ++ ++ // __lasx_xvavg_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w( ++ ++ // __lasx_xvavg_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d( ++ ++ // __lasx_xvavg_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu( ++ ++ // __lasx_xvavg_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu( ++ ++ // __lasx_xvavg_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu( ++ ++ // __lasx_xvavg_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du( ++ ++ // __lasx_xvavgr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b( ++ ++ // __lasx_xvavgr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h( ++ ++ // __lasx_xvavgr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w( ++ ++ // __lasx_xvavgr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d( ++ ++ // __lasx_xvavgr_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu( ++ ++ // __lasx_xvavgr_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu( ++ ++ // __lasx_xvavgr_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu( ++ ++ // __lasx_xvavgr_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du( ++ ++ // __lasx_xvssub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b( ++ ++ // __lasx_xvssub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h( ++ ++ // __lasx_xvssub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w( ++ ++ // __lasx_xvssub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d( ++ ++ // __lasx_xvssub_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu( ++ ++ // __lasx_xvssub_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu( ++ ++ // __lasx_xvssub_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu( ++ ++ // __lasx_xvssub_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du( ++ ++ // __lasx_xvabsd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b( ++ ++ // __lasx_xvabsd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h( ++ ++ // __lasx_xvabsd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w( ++ ++ // __lasx_xvabsd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d( ++ ++ // __lasx_xvabsd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu( ++ ++ // __lasx_xvabsd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu( ++ ++ // __lasx_xvabsd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu( ++ ++ // __lasx_xvabsd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du( ++ ++ // __lasx_xvmul_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b( ++ ++ // __lasx_xvmul_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h( ++ ++ // __lasx_xvmul_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w( ++ ++ // __lasx_xvmul_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d( ++ ++ // __lasx_xvmadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b( ++ ++ // __lasx_xvmadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h( ++ ++ // __lasx_xvmadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w( ++ ++ // __lasx_xvmadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d( ++ ++ // __lasx_xvmsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b( ++ ++ // __lasx_xvmsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h( ++ ++ // __lasx_xvmsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w( ++ ++ // __lasx_xvmsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d( ++ ++ // __lasx_xvdiv_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b( ++ ++ // __lasx_xvdiv_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h( ++ ++ // __lasx_xvdiv_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w( ++ ++ // __lasx_xvdiv_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d( ++ ++ // __lasx_xvdiv_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu( ++ ++ // __lasx_xvdiv_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu( ++ ++ // __lasx_xvdiv_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu( ++ ++ // __lasx_xvdiv_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du( ++ ++ // __lasx_xvhaddw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b( ++ ++ // __lasx_xvhaddw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h( ++ ++ // __lasx_xvhaddw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w( ++ ++ // __lasx_xvhaddw_hu_bu ++ // xd, xj, xk ++ // UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu( ++ ++ // __lasx_xvhaddw_wu_hu ++ // xd, xj, xk ++ // UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu( ++ ++ // __lasx_xvhaddw_du_wu ++ // xd, xj, xk ++ // UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu( ++ ++ // __lasx_xvhsubw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b( ++ ++ // __lasx_xvhsubw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h( ++ ++ // __lasx_xvhsubw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w( ++ ++ // __lasx_xvhsubw_hu_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu( ++ ++ // __lasx_xvhsubw_wu_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu( ++ ++ // __lasx_xvhsubw_du_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu( ++ ++ // __lasx_xvmod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b( ++ ++ // __lasx_xvmod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h( ++ ++ // __lasx_xvmod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w( ++ ++ // __lasx_xvmod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d( ++ ++ // __lasx_xvmod_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu( ++ ++ // __lasx_xvmod_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu( ++ ++ // __lasx_xvmod_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu( ++ ++ // __lasx_xvmod_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du( ++ ++ // __lasx_xvrepl128vei_b ++ // xd, xj, ui4 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b( ++ ++ // __lasx_xvrepl128vei_h ++ // xd, xj, ui3 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h( ++ ++ // __lasx_xvrepl128vei_w ++ // xd, xj, ui2 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w( ++ ++ // __lasx_xvrepl128vei_d ++ // xd, xj, ui1 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d( ++ ++ // __lasx_xvpickev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b( ++ ++ // __lasx_xvpickev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h( ++ ++ // __lasx_xvpickev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w( ++ ++ // __lasx_xvpickev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d( ++ ++ // __lasx_xvpickod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b( ++ ++ // __lasx_xvpickod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h( ++ ++ // __lasx_xvpickod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w( ++ ++ // __lasx_xvpickod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d( ++ ++ // __lasx_xvilvh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b( ++ ++ // __lasx_xvilvh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h( ++ ++ // __lasx_xvilvh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w( ++ ++ // __lasx_xvilvh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d( ++ ++ // __lasx_xvilvl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b( ++ ++ // __lasx_xvilvl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h( ++ ++ // __lasx_xvilvl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w( ++ ++ // __lasx_xvilvl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d( ++ ++ // __lasx_xvpackev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b( ++ ++ // __lasx_xvpackev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h( ++ ++ // __lasx_xvpackev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w( ++ ++ // __lasx_xvpackev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d( ++ ++ // __lasx_xvpackod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b( ++ ++ // __lasx_xvpackod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h( ++ ++ // __lasx_xvpackod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w( ++ ++ // __lasx_xvpackod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d( ++ ++ // __lasx_xvshuf_b ++ // xd, xj, xk, xa ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b( ++ ++ // __lasx_xvshuf_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h( ++ ++ // __lasx_xvshuf_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w( ++ ++ // __lasx_xvshuf_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d( ++ ++ // __lasx_xvand_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v( ++ ++ // __lasx_xvandi_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b( ++ ++ // __lasx_xvor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v( ++ ++ // __lasx_xvori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b( ++ ++ // __lasx_xvnor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v( ++ ++ // __lasx_xvnori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b( ++ ++ // __lasx_xvxor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v( ++ ++ // __lasx_xvxori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b( ++ ++ // __lasx_xvbitsel_v ++ // xd, xj, xk, xa ++ // UV32QI, UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v( ++ ++ // __lasx_xvbitseli_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b( ++ ++ // __lasx_xvshuf4i_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, USI ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b( ++ ++ // __lasx_xvshuf4i_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, USI ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h( ++ ++ // __lasx_xvshuf4i_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, USI ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w( ++ ++ // __lasx_xvreplgr2vr_b ++ // xd, rj ++ // V32QI, SI ++ v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b( ++ ++ // __lasx_xvreplgr2vr_h ++ // xd, rj ++ // V16HI, SI ++ v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h( ++ ++ // __lasx_xvreplgr2vr_w ++ // xd, rj ++ // V8SI, SI ++ v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w( ++ ++ // __lasx_xvreplgr2vr_d ++ // xd, rj ++ // V4DI, DI ++ v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d( ++ ++ // __lasx_xvpcnt_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b( ++ ++ // __lasx_xvpcnt_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h( ++ ++ // __lasx_xvpcnt_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w( ++ ++ // __lasx_xvpcnt_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d( ++ ++ // __lasx_xvclo_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b( ++ ++ // __lasx_xvclo_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h( ++ ++ // __lasx_xvclo_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w( ++ ++ // __lasx_xvclo_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d( ++ ++ // __lasx_xvclz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b( ++ ++ // __lasx_xvclz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h( ++ ++ // __lasx_xvclz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w( ++ ++ // __lasx_xvclz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d( ++ ++ // __lasx_xvfcmp_caf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s( ++ ++ // __lasx_xvfcmp_caf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d( ++ ++ // __lasx_xvfcmp_cor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s( ++ ++ // __lasx_xvfcmp_cor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d( ++ ++ // __lasx_xvfcmp_cun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s( ++ ++ // __lasx_xvfcmp_cun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d( ++ ++ // __lasx_xvfcmp_cune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s( ++ ++ // __lasx_xvfcmp_cune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d( ++ ++ // __lasx_xvfcmp_cueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s( ++ ++ // __lasx_xvfcmp_cueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d( ++ ++ // __lasx_xvfcmp_ceq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s( ++ ++ // __lasx_xvfcmp_ceq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d( ++ ++ // __lasx_xvfcmp_cne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s( ++ ++ // __lasx_xvfcmp_cne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d( ++ ++ // __lasx_xvfcmp_clt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s( ++ ++ // __lasx_xvfcmp_clt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d( ++ ++ // __lasx_xvfcmp_cult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s( ++ ++ // __lasx_xvfcmp_cult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d( ++ ++ // __lasx_xvfcmp_cle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s( ++ ++ // __lasx_xvfcmp_cle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d( ++ ++ // __lasx_xvfcmp_cule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s( ++ ++ // __lasx_xvfcmp_cule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d( ++ ++ // __lasx_xvfcmp_saf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s( ++ ++ // __lasx_xvfcmp_saf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d( ++ ++ // __lasx_xvfcmp_sor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s( ++ ++ // __lasx_xvfcmp_sor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d( ++ ++ // __lasx_xvfcmp_sun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s( ++ ++ // __lasx_xvfcmp_sun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d( ++ ++ // __lasx_xvfcmp_sune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s( ++ ++ // __lasx_xvfcmp_sune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d( ++ ++ // __lasx_xvfcmp_sueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s( ++ ++ // __lasx_xvfcmp_sueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d( ++ ++ // __lasx_xvfcmp_seq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s( ++ ++ // __lasx_xvfcmp_seq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d( ++ ++ // __lasx_xvfcmp_sne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s( ++ ++ // __lasx_xvfcmp_sne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d( ++ ++ // __lasx_xvfcmp_slt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s( ++ ++ // __lasx_xvfcmp_slt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d( ++ ++ // __lasx_xvfcmp_sult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s( ++ ++ // __lasx_xvfcmp_sult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d( ++ ++ // __lasx_xvfcmp_sle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s( ++ ++ // __lasx_xvfcmp_sle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d( ++ ++ // __lasx_xvfcmp_sule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s( ++ ++ // __lasx_xvfcmp_sule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d( ++ ++ // __lasx_xvfadd_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s( ++ ++ // __lasx_xvfadd_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d( ++ ++ // __lasx_xvfsub_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s( ++ ++ // __lasx_xvfsub_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d( ++ ++ // __lasx_xvfmul_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s( ++ ++ // __lasx_xvfmul_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d( ++ ++ // __lasx_xvfdiv_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s( ++ ++ // __lasx_xvfdiv_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d( ++ ++ // __lasx_xvfcvt_h_s ++ // xd, xj, xk ++ // V16HI, V8SF, V8SF ++ v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s( ++ ++ // __lasx_xvfcvt_s_d ++ // xd, xj, xk ++ // V8SF, V4DF, V4DF ++ v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d( ++ ++ // __lasx_xvfmin_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s( ++ ++ // __lasx_xvfmin_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d( ++ ++ // __lasx_xvfmina_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s( ++ ++ // __lasx_xvfmina_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d( ++ ++ // __lasx_xvfmax_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s( ++ ++ // __lasx_xvfmax_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d( ++ ++ // __lasx_xvfmaxa_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s( ++ ++ // __lasx_xvfmaxa_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d( ++ ++ // __lasx_xvfclass_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s( ++ ++ // __lasx_xvfclass_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d( ++ ++ // __lasx_xvfsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s( ++ ++ // __lasx_xvfsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d( ++ ++ // __lasx_xvfrecip_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s( ++ ++ // __lasx_xvfrecip_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d( ++ ++ // __lasx_xvfrint_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s( ++ ++ // __lasx_xvfrint_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d( ++ ++ // __lasx_xvfrsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s( ++ ++ // __lasx_xvfrsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d( ++ ++ // __lasx_xvflogb_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s( ++ ++ // __lasx_xvflogb_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d( ++ ++ // __lasx_xvfcvth_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h( ++ ++ // __lasx_xvfcvth_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s( ++ ++ // __lasx_xvfcvtl_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h( ++ ++ // __lasx_xvfcvtl_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s( ++ ++ // __lasx_xvftint_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s( ++ ++ // __lasx_xvftint_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d( ++ ++ // __lasx_xvftint_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s( ++ ++ // __lasx_xvftint_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d( ++ ++ // __lasx_xvftintrz_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s( ++ ++ // __lasx_xvftintrz_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d( ++ ++ // __lasx_xvftintrz_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s( ++ ++ // __lasx_xvftintrz_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d( ++ ++ // __lasx_xvffint_s_w ++ // xd, xj ++ // V8SF, V8SI ++ v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w( ++ ++ // __lasx_xvffint_d_l ++ // xd, xj ++ // V4DF, V4DI ++ v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l( ++ ++ // __lasx_xvffint_s_wu ++ // xd, xj ++ // V8SF, UV8SI ++ v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu( ++ ++ // __lasx_xvffint_d_lu ++ // xd, xj ++ // V4DF, UV4DI ++ v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu( ++ ++ // __lasx_xvreplve_b ++ // xd, xj, rk ++ // V32QI, V32QI, SI ++ v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b( ++ ++ // __lasx_xvreplve_h ++ // xd, xj, rk ++ // V16HI, V16HI, SI ++ v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h( ++ ++ // __lasx_xvreplve_w ++ // xd, xj, rk ++ // V8SI, V8SI, SI ++ v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w( ++ ++ // __lasx_xvreplve_d ++ // xd, xj, rk ++ // V4DI, V4DI, SI ++ v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d( ++ ++ // __lasx_xvpermi_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w( ++ ++ // __lasx_xvandn_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v( ++ ++ // __lasx_xvneg_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b( ++ ++ // __lasx_xvneg_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h( ++ ++ // __lasx_xvneg_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w( ++ ++ // __lasx_xvneg_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d( ++ ++ // __lasx_xvmuh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b( ++ ++ // __lasx_xvmuh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h( ++ ++ // __lasx_xvmuh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w( ++ ++ // __lasx_xvmuh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d( ++ ++ // __lasx_xvmuh_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu( ++ ++ // __lasx_xvmuh_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu( ++ ++ // __lasx_xvmuh_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu( ++ ++ // __lasx_xvmuh_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du( ++ ++ // __lasx_xvsllwil_h_b ++ // xd, xj, ui3 ++ // V16HI, V32QI, UQI ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b( ++ ++ // __lasx_xvsllwil_w_h ++ // xd, xj, ui4 ++ // V8SI, V16HI, UQI ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h( ++ ++ // __lasx_xvsllwil_d_w ++ // xd, xj, ui5 ++ // V4DI, V8SI, UQI ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w( ++ ++ // __lasx_xvsllwil_hu_bu ++ // xd, xj, ui3 ++ // UV16HI, UV32QI, UQI ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu( ++ ++ // __lasx_xvsllwil_wu_hu ++ // xd, xj, ui4 ++ // UV8SI, UV16HI, UQI ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu( ++ ++ // __lasx_xvsllwil_du_wu ++ // xd, xj, ui5 ++ // UV4DI, UV8SI, UQI ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu( ++ ++ // __lasx_xvsran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h( ++ ++ // __lasx_xvsran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w( ++ ++ // __lasx_xvsran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d( ++ ++ // __lasx_xvssran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h( ++ ++ // __lasx_xvssran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w( ++ ++ // __lasx_xvssran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d( ++ ++ // __lasx_xvssran_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h( ++ ++ // __lasx_xvssran_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w( ++ ++ // __lasx_xvssran_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d( ++ ++ // __lasx_xvsrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h( ++ ++ // __lasx_xvsrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w( ++ ++ // __lasx_xvsrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d( ++ ++ // __lasx_xvssrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h( ++ ++ // __lasx_xvssrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w( ++ ++ // __lasx_xvssrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d( ++ ++ // __lasx_xvssrarn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h( ++ ++ // __lasx_xvssrarn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w( ++ ++ // __lasx_xvssrarn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d( ++ ++ // __lasx_xvsrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h( ++ ++ // __lasx_xvsrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w( ++ ++ // __lasx_xvsrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d( ++ ++ // __lasx_xvssrln_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h( ++ ++ // __lasx_xvssrln_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w( ++ ++ // __lasx_xvssrln_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d( ++ ++ // __lasx_xvsrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h( ++ ++ // __lasx_xvsrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w( ++ ++ // __lasx_xvsrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d( ++ ++ // __lasx_xvssrlrn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h( ++ ++ // __lasx_xvssrlrn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w( ++ ++ // __lasx_xvssrlrn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d( ++ ++ // __lasx_xvfrstpi_b ++ // xd, xj, ui5 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b( ++ ++ // __lasx_xvfrstpi_h ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h( ++ ++ // __lasx_xvfrstp_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b( ++ ++ // __lasx_xvfrstp_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h( ++ ++ // __lasx_xvshuf4i_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d( ++ ++ // __lasx_xvbsrl_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v( ++ ++ // __lasx_xvbsll_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v( ++ ++ // __lasx_xvextrins_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b( ++ ++ // __lasx_xvextrins_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h( ++ ++ // __lasx_xvextrins_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w( ++ ++ // __lasx_xvextrins_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d( ++ ++ // __lasx_xvmskltz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b( ++ ++ // __lasx_xvmskltz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h( ++ ++ // __lasx_xvmskltz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w( ++ ++ // __lasx_xvmskltz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d( ++ ++ // __lasx_xvsigncov_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b( ++ ++ // __lasx_xvsigncov_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h( ++ ++ // __lasx_xvsigncov_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w( ++ ++ // __lasx_xvsigncov_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d( ++ ++ // __lasx_xvfmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s( ++ ++ // __lasx_xvfmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d( ++ ++ // __lasx_xvfmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s( ++ ++ // __lasx_xvfmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d( ++ ++ // __lasx_xvfnmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s( ++ ++ // __lasx_xvfnmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d( ++ ++ // __lasx_xvfnmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s( ++ ++ // __lasx_xvfnmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d( ++ ++ // __lasx_xvftintrne_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s( ++ ++ // __lasx_xvftintrne_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d( ++ ++ // __lasx_xvftintrp_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s( ++ ++ // __lasx_xvftintrp_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d( ++ ++ // __lasx_xvftintrm_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s( ++ ++ // __lasx_xvftintrm_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d( ++ ++ // __lasx_xvftint_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d( ++ ++ // __lasx_xvffint_s_l ++ // xd, xj, xk ++ // V8SF, V4DI, V4DI ++ v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l( ++ ++ // __lasx_xvftintrz_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d( ++ ++ // __lasx_xvftintrp_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d( ++ ++ // __lasx_xvftintrm_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d( ++ ++ // __lasx_xvftintrne_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d( ++ ++ // __lasx_xvftinth_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s( ++ ++ // __lasx_xvftintl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s( ++ ++ // __lasx_xvffinth_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w( ++ ++ // __lasx_xvffintl_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w( ++ ++ // __lasx_xvftintrzh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s( ++ ++ // __lasx_xvftintrzl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s( ++ ++ // __lasx_xvftintrph_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s( ++ ++ // __lasx_xvftintrpl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s( ++ ++ // __lasx_xvftintrmh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s( ++ ++ // __lasx_xvftintrml_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s( ++ ++ // __lasx_xvftintrneh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s( ++ ++ // __lasx_xvftintrnel_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s( ++ ++ // __lasx_xvfrintrne_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s( ++ ++ // __lasx_xvfrintrne_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d( ++ ++ // __lasx_xvfrintrz_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s( ++ ++ // __lasx_xvfrintrz_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d( ++ ++ // __lasx_xvfrintrp_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s( ++ ++ // __lasx_xvfrintrp_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d( ++ ++ // __lasx_xvfrintrm_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s( ++ ++ // __lasx_xvfrintrm_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d( ++ ++ // __lasx_xvld ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld( ++ ++ // __lasx_xvst ++ // xd, rj, si12 ++ // VOID, V32QI, CVPOINTER, SI ++ __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst( ++ ++ // __lasx_xvstelm_b ++ // xd, rj, si8, idx ++ // VOID, V32QI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b( ++ ++ // __lasx_xvstelm_h ++ // xd, rj, si8, idx ++ // VOID, V16HI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h( ++ ++ // __lasx_xvstelm_w ++ // xd, rj, si8, idx ++ // VOID, V8SI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w( ++ ++ // __lasx_xvstelm_d ++ // xd, rj, si8, idx ++ // VOID, V4DI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d( ++ ++ // __lasx_xvinsve0_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w( ++ ++ // __lasx_xvinsve0_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d( ++ ++ // __lasx_xvpickve_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w( ++ ++ // __lasx_xvpickve_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d( ++ ++ // __lasx_xvssrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h( ++ ++ // __lasx_xvssrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w( ++ ++ // __lasx_xvssrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d( ++ ++ // __lasx_xvssrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h( ++ ++ // __lasx_xvssrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w( ++ ++ // __lasx_xvssrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d( ++ ++ // __lasx_xvorn_v ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v( ++ ++ // __lasx_xvldi ++ // xd, i13 ++ // V4DI, HI ++ v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi( ++ ++ // __lasx_xvldx ++ // xd, rj, rk ++ // V32QI, CVPOINTER, DI ++ v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx( ++ ++ // __lasx_xvstx ++ // xd, rj, rk ++ // VOID, V32QI, CVPOINTER, DI ++ __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx( ++ ++ // __lasx_xvinsgr2vr_w ++ // xd, rj, ui3 ++ // V8SI, V8SI, SI, UQI ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w( ++ ++ // __lasx_xvinsgr2vr_d ++ // xd, rj, ui2 ++ // V4DI, V4DI, DI, UQI ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d( ++ ++ // __lasx_xvreplve0_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b( ++ ++ // __lasx_xvreplve0_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h( ++ ++ // __lasx_xvreplve0_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w( ++ ++ // __lasx_xvreplve0_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d( ++ ++ // __lasx_xvreplve0_q ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q( ++ ++ // __lasx_vext2xv_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b( ++ ++ // __lasx_vext2xv_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h( ++ ++ // __lasx_vext2xv_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w( ++ ++ // __lasx_vext2xv_w_b ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b( ++ ++ //gcc build fail ++ // __lasx_vext2xv_d_h ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h( ++ ++ // __lasx_vext2xv_d_b ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b( ++ ++ // __lasx_vext2xv_hu_bu ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu( ++ ++ // __lasx_vext2xv_wu_hu ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu( ++ ++ // __lasx_vext2xv_du_wu ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu( ++ ++ // __lasx_vext2xv_wu_bu ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu( ++ ++ //gcc build fail ++ // __lasx_vext2xv_du_hu ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu( ++ ++ // __lasx_vext2xv_du_bu ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu( ++ ++ // __lasx_xvpermi_q ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q( ++ ++ // __lasx_xvpermi_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, USI ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d( ++ ++ // __lasx_xvperm_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w( ++ ++ // __lasx_xvldrepl_b ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b( ++ ++ // __lasx_xvldrepl_h ++ // xd, rj, si11 ++ // V16HI, CVPOINTER, SI ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h( ++ ++ // __lasx_xvldrepl_w ++ // xd, rj, si10 ++ // V8SI, CVPOINTER, SI ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w( ++ ++ // __lasx_xvldrepl_d ++ // xd, rj, si9 ++ // V4DI, CVPOINTER, SI ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d( ++ ++ // __lasx_xvpickve2gr_w ++ // rd, xj, ui3 ++ // SI, V8SI, UQI ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w( ++ ++ // __lasx_xvpickve2gr_wu ++ // rd, xj, ui3 ++ // USI, V8SI, UQI ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu( ++ ++ // __lasx_xvpickve2gr_d ++ // rd, xj, ui2 ++ // DI, V4DI, UQI ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d( ++ ++ // __lasx_xvpickve2gr_du ++ // rd, xj, ui2 ++ // UDI, V4DI, UQI ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du( ++ ++ // __lasx_xvaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d( ++ ++ // __lasx_xvaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w( ++ ++ // __lasx_xvaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h( ++ ++ // __lasx_xvaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b( ++ ++ // __lasx_xvaddwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du( ++ ++ // __lasx_xvaddwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu( ++ ++ // __lasx_xvaddwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu( ++ ++ // __lasx_xvaddwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu( ++ ++ // __lasx_xvsubwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d( ++ ++ // __lasx_xvsubwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w( ++ ++ // __lasx_xvsubwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h( ++ ++ // __lasx_xvsubwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b( ++ ++ // __lasx_xvsubwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du( ++ ++ // __lasx_xvsubwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu( ++ ++ // __lasx_xvsubwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu( ++ ++ // __lasx_xvsubwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu( ++ ++ // __lasx_xvmulwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d( ++ ++ // __lasx_xvmulwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w( ++ ++ // __lasx_xvmulwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h( ++ ++ // __lasx_xvmulwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b( ++ ++ // __lasx_xvmulwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du( ++ ++ // __lasx_xvmulwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu( ++ ++ // __lasx_xvmulwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu( ++ ++ // __lasx_xvmulwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu( ++ ++ // __lasx_xvaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d( ++ ++ // __lasx_xvaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w( ++ ++ // __lasx_xvaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h( ++ ++ // __lasx_xvaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b( ++ ++ // __lasx_xvaddwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du( ++ ++ // __lasx_xvaddwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu( ++ ++ // __lasx_xvaddwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu( ++ ++ // __lasx_xvaddwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu( ++ ++ // __lasx_xvsubwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d( ++ ++ // __lasx_xvsubwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w( ++ ++ // __lasx_xvsubwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h( ++ ++ // __lasx_xvsubwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b( ++ ++ // __lasx_xvsubwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du( ++ ++ // __lasx_xvsubwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu( ++ ++ // __lasx_xvsubwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu( ++ ++ // __lasx_xvsubwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu( ++ ++ // __lasx_xvmulwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d( ++ ++ // __lasx_xvmulwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w( ++ ++ // __lasx_xvmulwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h( ++ ++ // __lasx_xvmulwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b( ++ ++ // __lasx_xvmulwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du( ++ ++ // __lasx_xvmulwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu( ++ ++ // __lasx_xvmulwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu( ++ ++ // __lasx_xvmulwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu( ++ ++ // __lasx_xvaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w( ++ ++ // __lasx_xvaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h( ++ ++ // __lasx_xvaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b( ++ ++ // __lasx_xvmulwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w( ++ ++ // __lasx_xvmulwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h( ++ ++ // __lasx_xvmulwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b( ++ ++ // __lasx_xvaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w( ++ ++ // __lasx_xvaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h( ++ ++ // __lasx_xvaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b( ++ ++ // __lasx_xvmulwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w( ++ ++ // __lasx_xvmulwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h( ++ ++ // __lasx_xvmulwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b( ++ ++ // __lasx_xvhaddw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d( ++ ++ // __lasx_xvhaddw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du( ++ ++ // __lasx_xvhsubw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d( ++ ++ // __lasx_xvhsubw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du( ++ ++ // __lasx_xvmaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d( ++ ++ // __lasx_xvmaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w( ++ ++ // __lasx_xvmaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h( ++ ++ // __lasx_xvmaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b( ++ ++ // __lasx_xvmaddwev_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du( ++ ++ // __lasx_xvmaddwev_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu( ++ ++ // __lasx_xvmaddwev_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu( ++ ++ // __lasx_xvmaddwev_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu( ++ ++ // __lasx_xvmaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d( ++ ++ // __lasx_xvmaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w( ++ ++ // __lasx_xvmaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h( ++ ++ // __lasx_xvmaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b( ++ ++ // __lasx_xvmaddwod_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du( ++ ++ // __lasx_xvmaddwod_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu( ++ ++ // __lasx_xvmaddwod_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu( ++ ++ // __lasx_xvmaddwod_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu( ++ ++ // __lasx_xvmaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d( ++ ++ // __lasx_xvmaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w( ++ ++ // __lasx_xvmaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h( ++ ++ // __lasx_xvmaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b( ++ ++ // __lasx_xvmaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d( ++ ++ // __lasx_xvmaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w( ++ ++ // __lasx_xvmaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h( ++ ++ // __lasx_xvmaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b( ++ ++ // __lasx_xvrotr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b( ++ ++ // __lasx_xvrotr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h( ++ ++ // __lasx_xvrotr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w( ++ ++ // __lasx_xvrotr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d( ++ ++ // __lasx_xvadd_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q( ++ ++ // __lasx_xvsub_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q( ++ ++ // __lasx_xvaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d( ++ ++ // __lasx_xvaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d( ++ ++ // __lasx_xvmulwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d( ++ ++ // __lasx_xvmulwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d( ++ ++ // __lasx_xvmskgez_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b( ++ ++ // __lasx_xvmsknz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b( ++ ++ // __lasx_xvexth_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b( ++ ++ // __lasx_xvexth_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h( ++ ++ // __lasx_xvexth_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w( ++ ++ // __lasx_xvexth_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d( ++ ++ // __lasx_xvexth_hu_bu ++ // xd, xj ++ // UV16HI, UV32QI ++ v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu( ++ ++ // __lasx_xvexth_wu_hu ++ // xd, xj ++ // UV8SI, UV16HI ++ v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu( ++ ++ // __lasx_xvexth_du_wu ++ // xd, xj ++ // UV4DI, UV8SI ++ v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu( ++ ++ // __lasx_xvexth_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du( ++ ++ // __lasx_xvextl_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d( ++ ++ // __lasx_xvextl_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du( ++ ++ // __lasx_xvrotri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b( ++ ++ // __lasx_xvrotri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h( ++ ++ // __lasx_xvrotri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w( ++ ++ // __lasx_xvrotri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d( ++ ++ // __lasx_xvsrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h( ++ ++ // __lasx_xvsrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w( ++ ++ // __lasx_xvsrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d( ++ ++ // __lasx_xvsrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q( ++ ++ // __lasx_xvsrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h( ++ ++ // __lasx_xvsrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w( ++ ++ // __lasx_xvsrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d( ++ ++ // __lasx_xvsrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q( ++ ++ // __lasx_xvssrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h( ++ ++ // __lasx_xvssrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w( ++ ++ // __lasx_xvssrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d( ++ ++ // __lasx_xvssrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q( ++ ++ // __lasx_xvssrlni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h( ++ ++ // __lasx_xvssrlni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w( ++ ++ // __lasx_xvssrlni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d( ++ ++ // __lasx_xvssrlni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q( ++ ++ // __lasx_xvssrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h( ++ ++ // __lasx_xvssrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w( ++ ++ // __lasx_xvssrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d( ++ ++ // __lasx_xvssrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q( ++ ++ // __lasx_xvssrlrni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h( ++ ++ // __lasx_xvssrlrni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w( ++ ++ // __lasx_xvssrlrni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d( ++ ++ // __lasx_xvssrlrni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q( ++ ++ // __lasx_xvsrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h( ++ ++ // __lasx_xvsrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w( ++ ++ // __lasx_xvsrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d( ++ ++ // __lasx_xvsrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q( ++ ++ // __lasx_xvsrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h( ++ ++ // __lasx_xvsrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w( ++ ++ // __lasx_xvsrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d( ++ ++ // __lasx_xvsrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q( ++ ++ // __lasx_xvssrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h( ++ ++ // __lasx_xvssrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w( ++ ++ // __lasx_xvssrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d( ++ ++ // __lasx_xvssrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q( ++ ++ // __lasx_xvssrani_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h( ++ ++ // __lasx_xvssrani_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w( ++ ++ // __lasx_xvssrani_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d( ++ ++ // __lasx_xvssrani_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q( ++ ++ // __lasx_xvssrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h( ++ ++ // __lasx_xvssrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w( ++ ++ // __lasx_xvssrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d( ++ ++ // __lasx_xvssrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q( ++ ++ // __lasx_xvssrarni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h( ++ ++ // __lasx_xvssrarni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w( ++ ++ // __lasx_xvssrarni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d( ++ ++ // __lasx_xvssrarni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q( ++ ++ // __lasx_xbnz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v( ++ ++ // __lasx_xbz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v( ++ ++ // __lasx_xbnz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b( ++ ++ // __lasx_xbnz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h( ++ ++ // __lasx_xbnz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w( ++ ++ // __lasx_xbnz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d( ++ ++ // __lasx_xbz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b( ++ ++ // __lasx_xbz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h( ++ ++ // __lasx_xbz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w( ++ ++ // __lasx_xbz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d( ++ ++ v32i8_r = __lasx_xvrepli_b(2); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepli.b( ++ ++ v16i16_r = __lasx_xvrepli_h(2); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepli.h( ++ ++ v8i32_r = __lasx_xvrepli_w(2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepli.w( ++ ++ v4i64_r = __lasx_xvrepli_d(2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepli.d( ++ ++ v4f64_r = __lasx_xvpickve_d_f(v4f64_a, 2); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f( ++ ++ v8f32_r = __lasx_xvpickve_w_f(v8f32_a, 2); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f( ++} +diff --git a/test/CodeGen/builtins-loongarch-lsx-error.c b/test/CodeGen/builtins-loongarch-lsx-error.c +new file mode 100644 +index 000000000..f566a7362 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lsx-error.c +@@ -0,0 +1,250 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v16i8_r = __lsx_vslli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vslli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vslli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrai_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrai_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrai_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrai_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrari_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrari_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrari_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrari_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vaddi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsubi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmini_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmini_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmini_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmini_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmini_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmini_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmini_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmini_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vseqi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vseqi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vseqi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vseqi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslti_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslti_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslti_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslti_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslti_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslti_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vslei_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslei_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslei_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslei_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslei_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslei_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslei_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslei_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsat_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsat_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsat_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsat_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vsat_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vsat_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vsat_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vsat_du(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16u8_r = __lsx_vandi_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vnori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vxori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vbsll_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ v16i8_r = __lsx_vrotri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vrotri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vrotri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vrotri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vld(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(v16i8_a, &v16i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v2i64_r = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++} +diff --git a/test/CodeGen/builtins-loongarch-lsx.c b/test/CodeGen/builtins-loongarch-lsx.c +new file mode 100644 +index 000000000..2b86c0b2e +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lsx.c +@@ -0,0 +1,3645 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1 0 ++#define ui2 1 ++#define ui3 4 ++#define ui4 7 ++#define ui5 25 ++#define ui6 44 ++#define ui7 100 ++#define ui8 127 //200 ++#define si5 -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 11 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lsx_vsll_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b( ++ ++ // __lsx_vsll_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h( ++ ++ // __lsx_vsll_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w( ++ ++ // __lsx_vsll_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d( ++ ++ // __lsx_vslli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b( ++ ++ // __lsx_vslli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h( ++ ++ // __lsx_vslli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w( ++ ++ // __lsx_vslli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d( ++ ++ // __lsx_vsra_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b( ++ ++ // __lsx_vsra_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h( ++ ++ // __lsx_vsra_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w( ++ ++ // __lsx_vsra_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d( ++ ++ // __lsx_vsrai_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b( ++ ++ // __lsx_vsrai_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h( ++ ++ // __lsx_vsrai_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w( ++ ++ // __lsx_vsrai_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d( ++ ++ // __lsx_vsrar_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b( ++ ++ // __lsx_vsrar_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h( ++ ++ // __lsx_vsrar_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w( ++ ++ // __lsx_vsrar_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d( ++ ++ // __lsx_vsrari_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b( ++ ++ // __lsx_vsrari_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h( ++ ++ // __lsx_vsrari_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w( ++ ++ // __lsx_vsrari_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d( ++ ++ // __lsx_vsrl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b( ++ ++ // __lsx_vsrl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h( ++ ++ // __lsx_vsrl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w( ++ ++ // __lsx_vsrl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d( ++ ++ // __lsx_vsrli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b( ++ ++ // __lsx_vsrli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h( ++ ++ // __lsx_vsrli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w( ++ ++ // __lsx_vsrli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d( ++ ++ // __lsx_vsrlr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b( ++ ++ // __lsx_vsrlr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h( ++ ++ // __lsx_vsrlr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w( ++ ++ // __lsx_vsrlr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d( ++ ++ // __lsx_vsrlri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b( ++ ++ // __lsx_vsrlri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h( ++ ++ // __lsx_vsrlri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w( ++ ++ // __lsx_vsrlri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d( ++ ++ // __lsx_vbitclr_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b( ++ ++ // __lsx_vbitclr_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h( ++ ++ // __lsx_vbitclr_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w( ++ ++ // __lsx_vbitclr_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d( ++ ++ // __lsx_vbitclri_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b( ++ ++ // __lsx_vbitclri_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h( ++ ++ // __lsx_vbitclri_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w( ++ ++ // __lsx_vbitclri_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d( ++ ++ // __lsx_vbitset_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b( ++ ++ // __lsx_vbitset_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h( ++ ++ // __lsx_vbitset_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w( ++ ++ // __lsx_vbitset_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d( ++ ++ // __lsx_vbitseti_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b( ++ ++ // __lsx_vbitseti_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h( ++ ++ // __lsx_vbitseti_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w( ++ ++ // __lsx_vbitseti_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d( ++ ++ // __lsx_vbitrev_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b( ++ ++ // __lsx_vbitrev_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h( ++ ++ // __lsx_vbitrev_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w( ++ ++ // __lsx_vbitrev_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d( ++ ++ // __lsx_vbitrevi_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b( ++ ++ // __lsx_vbitrevi_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h( ++ ++ // __lsx_vbitrevi_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w( ++ ++ // __lsx_vbitrevi_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d( ++ ++ // __lsx_vadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b( ++ ++ // __lsx_vadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h( ++ ++ // __lsx_vadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w( ++ ++ // __lsx_vadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d( ++ ++ // __lsx_vaddi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu( ++ ++ // __lsx_vaddi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu( ++ ++ // __lsx_vaddi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu( ++ ++ // __lsx_vaddi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du( ++ ++ // __lsx_vsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b( ++ ++ // __lsx_vsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h( ++ ++ // __lsx_vsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w( ++ ++ // __lsx_vsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d( ++ ++ // __lsx_vsubi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu( ++ ++ // __lsx_vsubi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu( ++ ++ // __lsx_vsubi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu( ++ ++ // __lsx_vsubi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du( ++ ++ // __lsx_vmax_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b( ++ ++ // __lsx_vmax_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h( ++ ++ // __lsx_vmax_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w( ++ ++ // __lsx_vmax_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d( ++ ++ // __lsx_vmaxi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b( ++ ++ // __lsx_vmaxi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h( ++ ++ // __lsx_vmaxi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w( ++ ++ // __lsx_vmaxi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d( ++ ++ // __lsx_vmax_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu( ++ ++ // __lsx_vmax_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu( ++ ++ // __lsx_vmax_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu( ++ ++ // __lsx_vmax_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du( ++ ++ // __lsx_vmaxi_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu( ++ ++ // __lsx_vmaxi_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu( ++ ++ // __lsx_vmaxi_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu( ++ ++ // __lsx_vmaxi_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du( ++ ++ // __lsx_vmin_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b( ++ ++ // __lsx_vmin_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h( ++ ++ // __lsx_vmin_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w( ++ ++ // __lsx_vmin_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d( ++ ++ // __lsx_vmini_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b( ++ ++ // __lsx_vmini_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h( ++ ++ // __lsx_vmini_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w( ++ ++ // __lsx_vmini_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d( ++ ++ // __lsx_vmin_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu( ++ ++ // __lsx_vmin_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu( ++ ++ // __lsx_vmin_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu( ++ ++ // __lsx_vmin_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du( ++ ++ // __lsx_vmini_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu( ++ ++ // __lsx_vmini_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu( ++ ++ // __lsx_vmini_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu( ++ ++ // __lsx_vmini_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du( ++ ++ // __lsx_vseq_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b( ++ ++ // __lsx_vseq_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h( ++ ++ // __lsx_vseq_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w( ++ ++ // __lsx_vseq_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d( ++ ++ // __lsx_vseqi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b( ++ ++ // __lsx_vseqi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h( ++ ++ // __lsx_vseqi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w( ++ ++ // __lsx_vseqi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d( ++ ++ // __lsx_vslti_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b( ++ ++ // __lsx_vslt_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b( ++ ++ // __lsx_vslt_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h( ++ ++ // __lsx_vslt_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w( ++ ++ // __lsx_vslt_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d( ++ ++ // __lsx_vslti_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h( ++ ++ // __lsx_vslti_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w( ++ ++ // __lsx_vslti_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d( ++ ++ // __lsx_vslt_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu( ++ ++ // __lsx_vslt_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu( ++ ++ // __lsx_vslt_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu( ++ ++ // __lsx_vslt_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du( ++ ++ // __lsx_vslti_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu( ++ ++ // __lsx_vslti_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu( ++ ++ // __lsx_vslti_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu( ++ ++ // __lsx_vslti_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du( ++ ++ // __lsx_vsle_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b( ++ ++ // __lsx_vsle_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h( ++ ++ // __lsx_vsle_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w( ++ ++ // __lsx_vsle_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d( ++ ++ // __lsx_vslei_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b( ++ ++ // __lsx_vslei_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h( ++ ++ // __lsx_vslei_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w( ++ ++ // __lsx_vslei_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d( ++ ++ // __lsx_vsle_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu( ++ ++ // __lsx_vsle_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu( ++ ++ // __lsx_vsle_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu( ++ ++ // __lsx_vsle_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du( ++ ++ // __lsx_vslei_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu( ++ ++ // __lsx_vslei_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu( ++ ++ // __lsx_vslei_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu( ++ ++ // __lsx_vslei_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du( ++ ++ // __lsx_vsat_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b( ++ ++ // __lsx_vsat_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h( ++ ++ // __lsx_vsat_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w( ++ ++ // __lsx_vsat_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d( ++ ++ // __lsx_vsat_bu ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu( ++ ++ // __lsx_vsat_hu ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu( ++ ++ // __lsx_vsat_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu( ++ ++ // __lsx_vsat_du ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du( ++ ++ // __lsx_vadda_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b( ++ ++ // __lsx_vadda_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h( ++ ++ // __lsx_vadda_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w( ++ ++ // __lsx_vadda_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d( ++ ++ // __lsx_vsadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b( ++ ++ // __lsx_vsadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h( ++ ++ // __lsx_vsadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w( ++ ++ // __lsx_vsadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d( ++ ++ // __lsx_vsadd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu( ++ ++ // __lsx_vsadd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu( ++ ++ // __lsx_vsadd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu( ++ ++ // __lsx_vsadd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du( ++ ++ // __lsx_vavg_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b( ++ ++ // __lsx_vavg_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h( ++ ++ // __lsx_vavg_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w( ++ ++ // __lsx_vavg_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d( ++ ++ // __lsx_vavg_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu( ++ ++ // __lsx_vavg_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu( ++ ++ // __lsx_vavg_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu( ++ ++ // __lsx_vavg_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du( ++ ++ // __lsx_vavgr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b( ++ ++ // __lsx_vavgr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h( ++ ++ // __lsx_vavgr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w( ++ ++ // __lsx_vavgr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d( ++ ++ // __lsx_vavgr_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu( ++ ++ // __lsx_vavgr_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu( ++ ++ // __lsx_vavgr_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu( ++ ++ // __lsx_vavgr_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du( ++ ++ // __lsx_vssub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b( ++ ++ // __lsx_vssub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h( ++ ++ // __lsx_vssub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w( ++ ++ // __lsx_vssub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d( ++ ++ // __lsx_vssub_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu( ++ ++ // __lsx_vssub_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu( ++ ++ // __lsx_vssub_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu( ++ ++ // __lsx_vssub_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du( ++ ++ // __lsx_vabsd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b( ++ ++ // __lsx_vabsd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h( ++ ++ // __lsx_vabsd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w( ++ ++ // __lsx_vabsd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d( ++ ++ // __lsx_vabsd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu( ++ ++ // __lsx_vabsd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu( ++ ++ // __lsx_vabsd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu( ++ ++ // __lsx_vabsd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du( ++ ++ // __lsx_vmul_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b( ++ ++ // __lsx_vmul_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h( ++ ++ // __lsx_vmul_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w( ++ ++ // __lsx_vmul_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d( ++ ++ // __lsx_vmadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b( ++ ++ // __lsx_vmadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h( ++ ++ // __lsx_vmadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w( ++ ++ // __lsx_vmadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d( ++ ++ // __lsx_vmsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b( ++ ++ // __lsx_vmsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h( ++ ++ // __lsx_vmsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w( ++ ++ // __lsx_vmsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d( ++ ++ // __lsx_vdiv_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b( ++ ++ // __lsx_vdiv_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h( ++ ++ // __lsx_vdiv_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w( ++ ++ // __lsx_vdiv_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d( ++ ++ // __lsx_vdiv_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu( ++ ++ // __lsx_vdiv_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu( ++ ++ // __lsx_vdiv_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu( ++ ++ // __lsx_vdiv_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du( ++ ++ // __lsx_vhaddw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b( ++ ++ // __lsx_vhaddw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h( ++ ++ // __lsx_vhaddw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w( ++ ++ // __lsx_vhaddw_hu_bu ++ // vd, vj, vk ++ // UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu( ++ ++ // __lsx_vhaddw_wu_hu ++ // vd, vj, vk ++ // UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu( ++ ++ // __lsx_vhaddw_du_wu ++ // vd, vj, vk ++ // UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu( ++ ++ // __lsx_vhsubw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b( ++ ++ // __lsx_vhsubw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h( ++ ++ // __lsx_vhsubw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w( ++ ++ // __lsx_vhsubw_hu_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu( ++ ++ // __lsx_vhsubw_wu_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu( ++ ++ // __lsx_vhsubw_du_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu( ++ ++ // __lsx_vmod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b( ++ ++ // __lsx_vmod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h( ++ ++ // __lsx_vmod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w( ++ ++ // __lsx_vmod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d( ++ ++ // __lsx_vmod_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu( ++ ++ // __lsx_vmod_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu( ++ ++ // __lsx_vmod_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu( ++ ++ // __lsx_vmod_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du( ++ ++ // __lsx_vreplve_b ++ // vd, vj, rk ++ // V16QI, V16QI, SI ++ v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b( ++ ++ // __lsx_vreplve_h ++ // vd, vj, rk ++ // V8HI, V8HI, SI ++ v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h( ++ ++ // __lsx_vreplve_w ++ // vd, vj, rk ++ // V4SI, V4SI, SI ++ v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w( ++ ++ // __lsx_vreplve_d ++ // vd, vj, rk ++ // V2DI, V2DI, SI ++ v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d( ++ ++ // __lsx_vreplvei_b ++ // vd, vj, ui4 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b( ++ ++ // __lsx_vreplvei_h ++ // vd, vj, ui3 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h( ++ ++ // __lsx_vreplvei_w ++ // vd, vj, ui2 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w( ++ ++ // __lsx_vreplvei_d ++ // vd, vj, ui1 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d( ++ ++ // __lsx_vpickev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b( ++ ++ // __lsx_vpickev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h( ++ ++ // __lsx_vpickev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w( ++ ++ // __lsx_vpickev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d( ++ ++ // __lsx_vpickod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b( ++ ++ // __lsx_vpickod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h( ++ ++ // __lsx_vpickod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w( ++ ++ // __lsx_vpickod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d( ++ ++ // __lsx_vilvh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b( ++ ++ // __lsx_vilvh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h( ++ ++ // __lsx_vilvh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w( ++ ++ // __lsx_vilvh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d( ++ ++ // __lsx_vilvl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b( ++ ++ // __lsx_vilvl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h( ++ ++ // __lsx_vilvl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w( ++ ++ // __lsx_vilvl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d( ++ ++ // __lsx_vpackev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b( ++ ++ // __lsx_vpackev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h( ++ ++ // __lsx_vpackev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w( ++ ++ // __lsx_vpackev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d( ++ ++ // __lsx_vpackod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b( ++ ++ // __lsx_vpackod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h( ++ ++ // __lsx_vpackod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w( ++ ++ // __lsx_vpackod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d( ++ ++ // __lsx_vshuf_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h( ++ ++ // __lsx_vshuf_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w( ++ ++ // __lsx_vshuf_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d( ++ ++ // __lsx_vand_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v( ++ ++ // __lsx_vandi_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b( ++ ++ // __lsx_vor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v( ++ ++ // __lsx_vori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b( ++ ++ // __lsx_vnor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v( ++ ++ // __lsx_vnori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b( ++ ++ // __lsx_vxor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v( ++ ++ // __lsx_vxori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b( ++ ++ // __lsx_vbitsel_v ++ // vd, vj, vk, va ++ // UV16QI, UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v( ++ ++ // __lsx_vbitseli_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b( ++ ++ // __lsx_vshuf4i_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b( ++ ++ // __lsx_vshuf4i_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h( ++ ++ // __lsx_vshuf4i_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w( ++ ++ // __lsx_vreplgr2vr_b ++ // vd, rj ++ // V16QI, SI ++ v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b( ++ ++ // __lsx_vreplgr2vr_h ++ // vd, rj ++ // V8HI, SI ++ v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h( ++ ++ // __lsx_vreplgr2vr_w ++ // vd, rj ++ // V4SI, SI ++ v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w( ++ ++ // __lsx_vreplgr2vr_d ++ // vd, rj ++ // V2DI, DI ++ v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d( ++ ++ // __lsx_vpcnt_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b( ++ ++ // __lsx_vpcnt_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h( ++ ++ // __lsx_vpcnt_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w( ++ ++ // __lsx_vpcnt_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d( ++ ++ // __lsx_vclo_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b( ++ ++ // __lsx_vclo_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h( ++ ++ // __lsx_vclo_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w( ++ ++ // __lsx_vclo_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d( ++ ++ // __lsx_vclz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b( ++ ++ // __lsx_vclz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h( ++ ++ // __lsx_vclz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w( ++ ++ // __lsx_vclz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d( ++ ++ // __lsx_vpickve2gr_b ++ // rd, vj, ui4 ++ // SI, V16QI, UQI ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b( ++ ++ // __lsx_vpickve2gr_h ++ // rd, vj, ui3 ++ // SI, V8HI, UQI ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h( ++ ++ // __lsx_vpickve2gr_w ++ // rd, vj, ui2 ++ // SI, V4SI, UQI ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w( ++ ++ // __lsx_vpickve2gr_d ++ // rd, vj, ui1 ++ // DI, V2DI, UQI ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d( ++ ++ // __lsx_vpickve2gr_bu ++ // rd, vj, ui4 ++ // USI, V16QI, UQI ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu( ++ ++ // __lsx_vpickve2gr_hu ++ // rd, vj, ui3 ++ // USI, V8HI, UQI ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu( ++ ++ // __lsx_vpickve2gr_wu ++ // rd, vj, ui2 ++ // USI, V4SI, UQI ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu( ++ ++ // __lsx_vpickve2gr_du ++ // rd, vj, ui1 ++ // UDI, V2DI, UQI ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du( ++ ++ // __lsx_vinsgr2vr_b ++ // vd, rj, ui4 ++ // V16QI, V16QI, SI, UQI ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b( ++ ++ // __lsx_vinsgr2vr_h ++ // vd, rj, ui3 ++ // V8HI, V8HI, SI, UQI ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h( ++ ++ // __lsx_vinsgr2vr_w ++ // vd, rj, ui2 ++ // V4SI, V4SI, SI, UQI ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w( ++ ++ // __lsx_vinsgr2vr_d ++ // vd, rj, ui1 ++ // V2DI, V2DI, SI, UQI ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d( ++ ++ // __lsx_vfcmp_caf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s( ++ ++ // __lsx_vfcmp_caf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d( ++ ++ // __lsx_vfcmp_cor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s( ++ ++ // __lsx_vfcmp_cor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d( ++ ++ // __lsx_vfcmp_cun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s( ++ ++ // __lsx_vfcmp_cun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d( ++ ++ // __lsx_vfcmp_cune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s( ++ ++ // __lsx_vfcmp_cune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d( ++ ++ // __lsx_vfcmp_cueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s( ++ ++ // __lsx_vfcmp_cueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d( ++ ++ // __lsx_vfcmp_ceq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s( ++ ++ // __lsx_vfcmp_ceq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d( ++ ++ // __lsx_vfcmp_cne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s( ++ ++ // __lsx_vfcmp_cne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d( ++ ++ // __lsx_vfcmp_clt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s( ++ ++ // __lsx_vfcmp_clt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d( ++ ++ // __lsx_vfcmp_cult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s( ++ ++ // __lsx_vfcmp_cult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d( ++ ++ // __lsx_vfcmp_cle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s( ++ ++ // __lsx_vfcmp_cle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d( ++ ++ // __lsx_vfcmp_cule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s( ++ ++ // __lsx_vfcmp_cule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d( ++ ++ // __lsx_vfcmp_saf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s( ++ ++ // __lsx_vfcmp_saf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d( ++ ++ // __lsx_vfcmp_sor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s( ++ ++ // __lsx_vfcmp_sor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d( ++ ++ // __lsx_vfcmp_sun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s( ++ ++ // __lsx_vfcmp_sun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d( ++ ++ // __lsx_vfcmp_sune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s( ++ ++ // __lsx_vfcmp_sune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d( ++ ++ // __lsx_vfcmp_sueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s( ++ ++ // __lsx_vfcmp_sueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d( ++ ++ // __lsx_vfcmp_seq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s( ++ ++ // __lsx_vfcmp_seq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d( ++ ++ // __lsx_vfcmp_sne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s( ++ ++ // __lsx_vfcmp_sne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d( ++ ++ // __lsx_vfcmp_slt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s( ++ ++ // __lsx_vfcmp_slt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d( ++ ++ // __lsx_vfcmp_sult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s( ++ ++ // __lsx_vfcmp_sult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d( ++ ++ // __lsx_vfcmp_sle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s( ++ ++ // __lsx_vfcmp_sle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d( ++ ++ // __lsx_vfcmp_sule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s( ++ ++ // __lsx_vfcmp_sule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d( ++ ++ // __lsx_vfadd_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s( ++ // __lsx_vfadd_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d( ++ ++ // __lsx_vfsub_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s( ++ ++ // __lsx_vfsub_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d( ++ ++ // __lsx_vfmul_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s( ++ ++ // __lsx_vfmul_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d( ++ ++ // __lsx_vfdiv_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s( ++ ++ // __lsx_vfdiv_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d( ++ ++ // __lsx_vfcvt_h_s ++ // vd, vj, vk ++ // V8HI, V4SF, V4SF ++ v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s( ++ ++ // __lsx_vfcvt_s_d ++ // vd, vj, vk ++ // V4SF, V2DF, V2DF ++ v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d( ++ ++ // __lsx_vfmin_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s( ++ ++ // __lsx_vfmin_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d( ++ ++ // __lsx_vfmina_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s( ++ ++ // __lsx_vfmina_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d( ++ ++ // __lsx_vfmax_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s( ++ ++ // __lsx_vfmax_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d( ++ ++ // __lsx_vfmaxa_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s( ++ ++ // __lsx_vfmaxa_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d( ++ ++ // __lsx_vfclass_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s( ++ ++ // __lsx_vfclass_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d( ++ ++ // __lsx_vfsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s( ++ ++ // __lsx_vfsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d( ++ ++ // __lsx_vfrecip_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s( ++ ++ // __lsx_vfrecip_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d( ++ ++ // __lsx_vfrint_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s( ++ ++ // __lsx_vfrint_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d( ++ ++ // __lsx_vfrsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s( ++ ++ // __lsx_vfrsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d( ++ ++ // __lsx_vflogb_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s( ++ ++ // __lsx_vflogb_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d( ++ ++ // __lsx_vfcvth_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h( ++ ++ // __lsx_vfcvth_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s( ++ ++ //gcc build fail ++ ++ // __lsx_vfcvtl_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h( ++ ++ // __lsx_vfcvtl_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s( ++ ++ // __lsx_vftint_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s( ++ ++ // __lsx_vftint_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d( ++ ++ // __lsx_vftint_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s( ++ ++ // __lsx_vftint_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d( ++ ++ // __lsx_vftintrz_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s( ++ ++ // __lsx_vftintrz_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d( ++ ++ // __lsx_vftintrz_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s( ++ ++ // __lsx_vftintrz_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d( ++ ++ // __lsx_vffint_s_w ++ // vd, vj ++ // V4SF, V4SI ++ v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w( ++ ++ // __lsx_vffint_d_l ++ // vd, vj ++ // V2DF, V2DI ++ v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l( ++ ++ // __lsx_vffint_s_wu ++ // vd, vj ++ // V4SF, UV4SI ++ v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu( ++ ++ // __lsx_vffint_d_lu ++ // vd, vj ++ // V2DF, UV2DI ++ v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu( ++ ++ // __lsx_vandn_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v( ++ ++ // __lsx_vneg_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b( ++ ++ // __lsx_vneg_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h( ++ ++ // __lsx_vneg_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w( ++ ++ // __lsx_vneg_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d( ++ ++ // __lsx_vmuh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b( ++ ++ // __lsx_vmuh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h( ++ ++ // __lsx_vmuh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w( ++ ++ // __lsx_vmuh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d( ++ ++ // __lsx_vmuh_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu( ++ ++ // __lsx_vmuh_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu( ++ ++ // __lsx_vmuh_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu( ++ ++ // __lsx_vmuh_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du( ++ ++ // __lsx_vsllwil_h_b ++ // vd, vj, ui3 ++ // V8HI, V16QI, UQI ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b( ++ ++ // __lsx_vsllwil_w_h ++ // vd, vj, ui4 ++ // V4SI, V8HI, UQI ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h( ++ ++ // __lsx_vsllwil_d_w ++ // vd, vj, ui5 ++ // V2DI, V4SI, UQI ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w( ++ ++ // __lsx_vsllwil_hu_bu ++ // vd, vj, ui3 ++ // UV8HI, UV16QI, UQI ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu( ++ ++ // __lsx_vsllwil_wu_hu ++ // vd, vj, ui4 ++ // UV4SI, UV8HI, UQI ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu( ++ ++ // __lsx_vsllwil_du_wu ++ // vd, vj, ui5 ++ // UV2DI, UV4SI, UQI ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu( ++ ++ // __lsx_vsran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h( ++ ++ // __lsx_vsran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w( ++ ++ // __lsx_vsran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d( ++ ++ // __lsx_vssran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h( ++ ++ // __lsx_vssran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w( ++ ++ // __lsx_vssran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d( ++ ++ // __lsx_vssran_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h( ++ ++ // __lsx_vssran_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w( ++ ++ // __lsx_vssran_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d( ++ ++ // __lsx_vsrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h( ++ ++ // __lsx_vsrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w( ++ ++ // __lsx_vsrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d( ++ ++ // __lsx_vssrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h( ++ ++ // __lsx_vssrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w( ++ ++ // __lsx_vssrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d( ++ ++ // __lsx_vssrarn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h( ++ ++ // __lsx_vssrarn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w( ++ ++ // __lsx_vssrarn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d( ++ ++ // __lsx_vsrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h( ++ ++ // __lsx_vsrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w( ++ ++ // __lsx_vsrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d( ++ ++ // __lsx_vssrln_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h( ++ ++ // __lsx_vssrln_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w( ++ ++ // __lsx_vssrln_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d( ++ ++ // __lsx_vsrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h( ++ ++ // __lsx_vsrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w( ++ ++ // __lsx_vsrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d( ++ ++ // __lsx_vssrlrn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h( ++ ++ // __lsx_vssrlrn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w( ++ ++ // __lsx_vssrlrn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d( ++ ++ // __lsx_vfrstpi_b ++ // vd, vj, ui5 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b( ++ ++ // __lsx_vfrstpi_h ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h( ++ ++ // __lsx_vfrstp_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b( ++ ++ // __lsx_vfrstp_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h( ++ ++ // __lsx_vshuf4i_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d( ++ ++ // __lsx_vbsrl_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v( ++ ++ // __lsx_vbsll_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v( ++ ++ // __lsx_vextrins_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b( ++ ++ // __lsx_vextrins_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h( ++ ++ // __lsx_vextrins_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, UQI ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w( ++ ++ // __lsx_vextrins_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, UQI ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d( ++ ++ // __lsx_vmskltz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b( ++ ++ // __lsx_vmskltz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h( ++ ++ // __lsx_vmskltz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w( ++ ++ // __lsx_vmskltz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d( ++ ++ // __lsx_vsigncov_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b( ++ ++ // __lsx_vsigncov_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h( ++ ++ // __lsx_vsigncov_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w( ++ ++ // __lsx_vsigncov_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d( ++ ++ // __lsx_vfmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s( ++ ++ // __lsx_vfmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d( ++ ++ // __lsx_vfmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s( ++ ++ // __lsx_vfmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d( ++ ++ // __lsx_vfnmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s( ++ ++ // __lsx_vfnmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d( ++ ++ // __lsx_vfnmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s( ++ ++ // __lsx_vfnmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d( ++ ++ // __lsx_vftintrne_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s( ++ ++ // __lsx_vftintrne_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d( ++ ++ // __lsx_vftintrp_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s( ++ ++ // __lsx_vftintrp_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d( ++ ++ // __lsx_vftintrm_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s( ++ ++ // __lsx_vftintrm_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d( ++ ++ // __lsx_vftint_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d( ++ ++ // __lsx_vffint_s_l ++ // vd, vj, vk ++ // V4SF, V2DI, V2DI ++ v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l( ++ ++ // __lsx_vftintrz_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d( ++ ++ // __lsx_vftintrp_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d( ++ ++ // __lsx_vftintrm_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d( ++ ++ // __lsx_vftintrne_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d( ++ ++ // __lsx_vftintl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s( ++ ++ // __lsx_vftinth_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s( ++ ++ // __lsx_vffinth_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w( ++ ++ // __lsx_vffintl_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w( ++ ++ // __lsx_vftintrzl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s( ++ ++ // __lsx_vftintrzh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s( ++ ++ // __lsx_vftintrpl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s( ++ ++ // __lsx_vftintrph_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s( ++ ++ // __lsx_vftintrml_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s( ++ ++ // __lsx_vftintrmh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s( ++ ++ // __lsx_vftintrnel_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s( ++ ++ // __lsx_vftintrneh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s( ++ ++ // __lsx_vfrintrne_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrne.s( ++ ++ // __lsx_vfrintrne_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrne.d( ++ ++ // __lsx_vfrintrz_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrz.s( ++ ++ // __lsx_vfrintrz_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrz.d( ++ ++ // __lsx_vfrintrp_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrp.s( ++ ++ // __lsx_vfrintrp_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrp.d( ++ ++ // __lsx_vfrintrm_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrm.s( ++ ++ // __lsx_vfrintrm_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrm.d( ++ ++ // __lsx_vstelm_b ++ // vd, rj, si8, idx ++ // VOID, V16QI, CVPOINTER, SI, UQI ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b( ++ // __lsx_vstelm_h ++ // vd, rj, si8, idx ++ // VOID, V8HI, CVPOINTER, SI, UQI ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h( ++ ++ // __lsx_vstelm_w ++ // vd, rj, si8, idx ++ // VOID, V4SI, CVPOINTER, SI, UQI ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w( ++ ++ // __lsx_vstelm_d ++ // vd, rj, si8, idx ++ // VOID, V2DI, CVPOINTER, SI, UQI ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d( ++ ++ // __lsx_vaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w( ++ ++ // __lsx_vaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h( ++ ++ // __lsx_vaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b( ++ ++ // __lsx_vaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w( ++ ++ // __lsx_vaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h( ++ ++ // __lsx_vaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b( ++ ++ // __lsx_vaddwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu( ++ ++ // __lsx_vaddwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu( ++ ++ // __lsx_vaddwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu( ++ ++ // __lsx_vaddwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu( ++ ++ // __lsx_vaddwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu( ++ ++ // __lsx_vaddwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu( ++ ++ // __lsx_vaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w( ++ ++ // __lsx_vaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h( ++ ++ // __lsx_vaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b( ++ ++ // __lsx_vaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w( ++ ++ // __lsx_vaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h( ++ ++ // __lsx_vaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b( ++ ++ // __lsx_vsubwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w( ++ ++ // __lsx_vsubwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h( ++ ++ // __lsx_vsubwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b( ++ ++ // __lsx_vsubwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w( ++ ++ // __lsx_vsubwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h( ++ ++ // __lsx_vsubwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b( ++ ++ // __lsx_vsubwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu( ++ ++ // __lsx_vsubwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu( ++ ++ // __lsx_vsubwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu( ++ ++ // __lsx_vsubwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu( ++ ++ // __lsx_vsubwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu( ++ ++ // __lsx_vsubwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu( ++ ++ // __lsx_vaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d( ++ ++ // __lsx_vaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d( ++ ++ // __lsx_vaddwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du( ++ ++ // __lsx_vaddwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du( ++ ++ // __lsx_vsubwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d( ++ ++ // __lsx_vsubwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d( ++ ++ // __lsx_vsubwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du( ++ ++ // __lsx_vsubwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du( ++ ++ // __lsx_vaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d( ++ ++ // __lsx_vaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d( ++ ++ // __lsx_vmulwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w( ++ ++ // __lsx_vmulwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h( ++ ++ // __lsx_vmulwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b( ++ ++ // __lsx_vmulwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w( ++ ++ // __lsx_vmulwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h( ++ ++ // __lsx_vmulwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b( ++ ++ // __lsx_vmulwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu( ++ ++ // __lsx_vmulwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu( ++ ++ // __lsx_vmulwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu( ++ ++ // __lsx_vmulwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu( ++ ++ // __lsx_vmulwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu( ++ ++ // __lsx_vmulwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu( ++ ++ // __lsx_vmulwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w( ++ ++ // __lsx_vmulwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h( ++ ++ // __lsx_vmulwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b( ++ ++ // __lsx_vmulwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w( ++ ++ // __lsx_vmulwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h( ++ ++ // __lsx_vmulwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b( ++ ++ // __lsx_vmulwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d( ++ ++ // __lsx_vmulwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d( ++ ++ // __lsx_vmulwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du( ++ ++ // __lsx_vmulwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du( ++ ++ // __lsx_vmulwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d( ++ ++ // __lsx_vmulwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d( ++ ++ // __lsx_vhaddw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d( ++ ++ // __lsx_vhaddw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du( ++ ++ // __lsx_vhsubw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d( ++ ++ // __lsx_vhsubw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du( ++ ++ // __lsx_vmaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w( ++ ++ // __lsx_vmaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h( ++ ++ // __lsx_vmaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b( ++ ++ // __lsx_vmaddwev_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu( ++ ++ // __lsx_vmaddwev_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu( ++ ++ // __lsx_vmaddwev_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu( ++ ++ // __lsx_vmaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w( ++ ++ // __lsx_vmaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h( ++ ++ // __lsx_vmaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b( ++ ++ // __lsx_vmaddwod_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu( ++ ++ // __lsx_vmaddwod_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu( ++ ++ // __lsx_vmaddwod_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu( ++ ++ // __lsx_vmaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w( ++ ++ // __lsx_vmaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h( ++ ++ // __lsx_vmaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b( ++ ++ // __lsx_vmaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w( ++ ++ // __lsx_vmaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h( ++ ++ // __lsx_vmaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b( ++ ++ // __lsx_vmaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d( ++ ++ // __lsx_vmaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d( ++ ++ // __lsx_vmaddwev_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du( ++ ++ // __lsx_vmaddwod_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du( ++ ++ // __lsx_vmaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d( ++ ++ // __lsx_vmaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d( ++ ++ // __lsx_vrotr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b( ++ ++ // __lsx_vrotr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h( ++ ++ // __lsx_vrotr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w( ++ ++ // __lsx_vrotr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d( ++ ++ // __lsx_vadd_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q( ++ ++ // __lsx_vsub_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q( ++ ++ // __lsx_vldrepl_b ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b( ++ ++ // __lsx_vldrepl_h ++ // vd, rj, si11 ++ // V8HI, CVPOINTER, SI ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h( ++ ++ // __lsx_vldrepl_w ++ // vd, rj, si10 ++ // V4SI, CVPOINTER, SI ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w( ++ ++ // __lsx_vldrepl_d ++ // vd, rj, si9 ++ // V2DI, CVPOINTER, SI ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d( ++ ++ // __lsx_vmskgez_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b( ++ ++ // __lsx_vmsknz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b( ++ ++ // __lsx_vexth_h_b ++ // vd, vj ++ // V8HI, V16QI ++ v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b( ++ ++ // __lsx_vexth_w_h ++ // vd, vj ++ // V4SI, V8HI ++ v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h( ++ ++ // __lsx_vexth_d_w ++ // vd, vj ++ // V2DI, V4SI ++ v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w( ++ ++ // __lsx_vexth_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d( ++ ++ // __lsx_vexth_hu_bu ++ // vd, vj ++ // UV8HI, UV16QI ++ v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu( ++ ++ // __lsx_vexth_wu_hu ++ // vd, vj ++ // UV4SI, UV8HI ++ v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu( ++ ++ // __lsx_vexth_du_wu ++ // vd, vj ++ // UV2DI, UV4SI ++ v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu( ++ ++ // __lsx_vexth_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du( ++ ++ // __lsx_vrotri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b( ++ ++ // __lsx_vrotri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h( ++ ++ // __lsx_vrotri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w( ++ ++ // __lsx_vrotri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d( ++ ++ // __lsx_vextl_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d( ++ ++ // __lsx_vsrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h( ++ ++ // __lsx_vsrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w( ++ ++ // __lsx_vsrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d( ++ ++ // __lsx_vsrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q( ++ ++ // __lsx_vssrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h( ++ ++ // __lsx_vssrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w( ++ ++ // __lsx_vssrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d( ++ ++ // __lsx_vssrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q( ++ ++ // __lsx_vssrlni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h( ++ ++ // __lsx_vssrlni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w( ++ ++ // __lsx_vssrlni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d( ++ ++ // __lsx_vssrlni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q( ++ ++ // __lsx_vssrlrni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h( ++ ++ // __lsx_vssrlrni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w( ++ ++ // __lsx_vssrlrni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d( ++ ++ // __lsx_vssrlrni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q( ++ ++ // __lsx_vssrlrni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h( ++ ++ // __lsx_vssrlrni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w( ++ ++ // __lsx_vssrlrni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d( ++ ++ // __lsx_vssrlrni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q( ++ ++ // __lsx_vsrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h( ++ ++ // __lsx_vsrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w( ++ ++ // __lsx_vsrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d( ++ ++ // __lsx_vsrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q( ++ ++ // __lsx_vsrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h( ++ ++ // __lsx_vsrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w( ++ ++ // __lsx_vsrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d( ++ ++ // __lsx_vsrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q( ++ ++ // __lsx_vssrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h( ++ ++ // __lsx_vssrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w( ++ ++ // __lsx_vssrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d( ++ ++ // __lsx_vssrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q( ++ ++ // __lsx_vssrani_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h( ++ ++ // __lsx_vssrani_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w( ++ ++ // __lsx_vssrani_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d( ++ ++ // __lsx_vssrani_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q( ++ ++ // __lsx_vssrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h( ++ ++ // __lsx_vssrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w( ++ ++ // __lsx_vssrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d( ++ ++ // __lsx_vssrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q( ++ ++ // __lsx_vssrarni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h( ++ ++ // __lsx_vssrarni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w( ++ ++ // __lsx_vssrarni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d( ++ ++ // __lsx_vssrarni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q( ++ ++ // __lsx_vpermi_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w( ++ ++ // __lsx_vld ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld( ++ ++ // __lsx_vst ++ // vd, rj, si12 ++ // VOID, V16QI, CVPOINTER, SI ++ __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst( ++ ++ // __lsx_vssrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h( ++ ++ // __lsx_vssrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w( ++ ++ // __lsx_vssrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d( ++ ++ // __lsx_vssrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h( ++ ++ // __lsx_vssrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w( ++ ++ // __lsx_vssrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d( ++ ++ // __lsx_vorn_v ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v( ++ ++ // __lsx_vldi ++ // vd, i13 ++ // V2DI, HI ++ v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi( ++ ++ // __lsx_vshuf_b ++ // vd, vj, vk, va ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b( ++ ++ // __lsx_vldx ++ // vd, rj, rk ++ // V16QI, CVPOINTER, DI ++ v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx( ++ ++ // __lsx_vstx ++ // vd, rj, rk ++ // VOID, V16QI, CVPOINTER, DI ++ __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx( ++ ++ // __lsx_vextl_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du( ++ ++ // __lsx_bnz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v( ++ ++ // __lsx_bz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v( ++ ++ // __lsx_bnz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b( ++ ++ // __lsx_bnz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h( ++ ++ // __lsx_bnz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w( ++ ++ // __lsx_bnz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d( ++ ++ // __lsx_bz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b( ++ ++ // __lsx_bz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h( ++ ++ // __lsx_bz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w( ++ ++ // __lsx_bz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d( ++ ++ v16i8_r = __lsx_vsrlrni_b_h(v16i8_a, v16i8_b, 2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h( ++ ++ v8i16_r = __lsx_vsrlrni_h_w(v8i16_a, v8i16_b, 2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w( ++ ++ v4i32_r = __lsx_vsrlrni_w_d(v4i32_a, v4i32_b, 2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d( ++ ++ v2i64_r = __lsx_vsrlrni_d_q(v2i64_a, v2i64_b, 2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q( ++ ++ v16i8_r = __lsx_vrepli_b(2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrepli.b( ++ ++ v8i16_r = __lsx_vrepli_h(2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrepli.h( ++ ++ v4i32_r = __lsx_vrepli_w(2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrepli.w( ++ ++ v2i64_r = __lsx_vrepli_d(2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrepli.d( ++} +diff --git a/test/CodeGen/loongarch-inline-asm-modifiers.c b/test/CodeGen/loongarch-inline-asm-modifiers.c +new file mode 100644 +index 000000000..412eca2bd +--- /dev/null ++++ b/test/CodeGen/loongarch-inline-asm-modifiers.c +@@ -0,0 +1,50 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \ ++// RUN: | FileCheck %s ++ ++// This checks that the frontend will accept inline asm operand modifiers ++ ++int printf(const char*, ...); ++ ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); ++ ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,$1;\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,${1:D};\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f" ++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f" ++int b[8] = {0,1,2,3,4,5,6,7}; ++int main() ++{ ++ int i; ++ v2i64 v2i64_r; ++ v4i64 v4i64_r; ++ ++ // The first word. Notice, no 'D' ++ {asm ( ++ "ld.w %0,%1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)));} ++ ++ printf("%d\n",i); ++ ++ // The second word ++ {asm ( ++ "ld.w %0,%D1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)) ++ );} ++ ++ // LSX registers ++ { asm("vldi %w0,1" ++ : "=f"(v2i64_r)); } ++ ++ printf("%d\n", i); ++ ++ // LASX registers ++ { asm("xldi %u0,1" ++ : "=f"(v4i64_r)); } ++ ++ printf("%d\n",i); ++ ++ return 1; ++} +diff --git a/test/CodeGen/loongarch-inline-asm.c b/test/CodeGen/loongarch-inline-asm.c +new file mode 100644 +index 000000000..1f995ac79 +--- /dev/null ++++ b/test/CodeGen/loongarch-inline-asm.c +@@ -0,0 +1,31 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s ++ ++int data; ++ ++void m () { ++ asm("ld.w $r1, %0" :: "m"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(ptr elementtype(i32) @data) ++} ++ ++void ZC () { ++ asm("ll.w $r1, %0" :: "ZC"(data)); ++ // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(ptr elementtype(i32) @data) ++} ++ ++void ZB () { ++ asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data)); ++ // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(ptr elementtype(i32) @data) ++} ++ ++void R () { ++ asm("ld.w $r1, %0" :: "R"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(ptr elementtype(i32) @data) ++} ++ ++int *p; ++void preld () { ++ asm("preld 0, %0, 2" :: "r"(p)); ++ // CHECK: %0 = load ptr, ptr @p, align 8 ++ // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(ptr %0) ++} +diff --git a/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +new file mode 100644 +index 000000000..dc5ffaf08 +--- /dev/null ++++ b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +@@ -0,0 +1,95 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++// RUN: -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++/// Ensure that fields inherited from a parent struct are treated in the same ++/// way as fields directly in the child for the purposes of LoongArch ABI rules. ++ ++struct parent1_int32_s { ++ int32_t i1; ++}; ++ ++struct child1_int32_s : parent1_int32_s { ++ int32_t i2; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce) ++struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) { ++ return a; ++} ++ ++struct parent2_int32_s { ++ int32_t i1; ++}; ++ ++struct child2_float_s : parent2_int32_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1) ++struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) { ++ return a; ++} ++ ++struct parent3_float_s { ++ float f1; ++}; ++ ++struct child3_int64_s : parent3_float_s { ++ int64_t i1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1) ++struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) { ++ return a; ++} ++ ++struct parent4_double_s { ++ double d1; ++}; ++ ++struct child4_double_s : parent4_double_s { ++ double d1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1) ++struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) { ++ return a; ++} ++ ++/// When virtual inheritance is used, the resulting struct isn't eligible for ++/// passing in registers. ++ ++struct parent5_virtual_s { ++ int32_t i1; ++}; ++ ++struct child5_virtual_s : virtual parent5_virtual_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr{{.*}} %this, ptr{{.*}} dereferenceable(12) %0) ++struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { ++ return a; ++} ++ ++/// Check for correct lowering in the presence of diamoned inheritance. ++ ++struct parent6_float_s { ++ float f1; ++}; ++ ++struct child6a_s : parent6_float_s { ++}; ++ ++struct child6b_s : parent6_float_s { ++}; ++ ++struct grandchild_6_s : child6a_s, child6b_s { ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1) ++struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) { ++ return a; ++} +diff --git a/test/Driver/baremetal.cpp b/test/Driver/baremetal.cpp +index 7c11fe671..56eb5b708 100644 +--- a/test/Driver/baremetal.cpp ++++ b/test/Driver/baremetal.cpp +@@ -105,7 +105,7 @@ + // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include" + + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: -target aarch64-none-elf \ ++// RUN: -target aarch64-none-elf --sysroot= \ + // RUN: | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s + // Verify that the bare metal driver does not include any host system paths: + // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] +diff --git a/test/Driver/hexagon-toolchain-linux.c b/test/Driver/hexagon-toolchain-linux.c +index 05ae17339..986c2dd61 100644 +--- a/test/Driver/hexagon-toolchain-linux.c ++++ b/test/Driver/hexagon-toolchain-linux.c +@@ -100,7 +100,7 @@ + // ----------------------------------------------------------------------------- + // internal-isystem for linux with and without musl + // ----------------------------------------------------------------------------- +-// RUN: %clang -### -target hexagon-unknown-linux-musl \ ++// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +@@ -110,7 +110,7 @@ + // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include" + // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include" + +-// RUN: %clang -### -target hexagon-unknown-linux \ ++// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +diff --git a/test/Driver/loongarch-alignment-feature.c b/test/Driver/loongarch-alignment-feature.c +new file mode 100644 +index 000000000..2270ff536 +--- /dev/null ++++ b/test/Driver/loongarch-alignment-feature.c +@@ -0,0 +1,8 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s ++ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s ++ ++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access" ++// CHECK-ALIGNED: "-target-feature" "-unaligned-access" +diff --git a/test/Driver/loongarch-march.c b/test/Driver/loongarch-march.c +new file mode 100644 +index 000000000..196862229 +--- /dev/null ++++ b/test/Driver/loongarch-march.c +@@ -0,0 +1,15 @@ ++/// This test checks the valid cpu model which is supported by LoongArch. ++ ++// RUN: %clang --target=loongarch64 -march=la264 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA264 %t %s ++// RUN: %clang --target=loongarch64 -march=la364 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA364 %t %s ++// RUN: %clang --target=loongarch64 -march=la464 -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=LA464 %t %s ++// RUN: %clang --target=loongarch64 -march=xxx -emit-llvm -### %s 2> %t ++// | FileCheck -check-prefix=INVALID %t %s ++ ++// LA264: "-target-cpu la264" "-target-abi lp64" ++// LA364: "-target-cpu la364" "-target-abi lp64" ++// LA464: "-target-cpu la464" "-target-abi lp64" ++// INVALID: error: unknown target CPU 'xxx' +diff --git a/test/Preprocessor/init-loongarch.c b/test/Preprocessor/init-loongarch.c +new file mode 100644 +index 000000000..a32853d17 +--- /dev/null ++++ b/test/Preprocessor/init-loongarch.c +@@ -0,0 +1,10 @@ ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | grep loongarch | FileCheck %s ++ ++// CHECK: #define __loongarch64 1 ++// CHECK-NEXT: #define __loongarch__ 1 ++// CHECK-NEXT: #define __loongarch_double_float 1 ++// CHECK-NEXT: #define __loongarch_fpr 64 ++// CHECK-NEXT: #define __loongarch_frlen 64 ++// CHECK-NEXT: #define __loongarch_grlen 64 ++// CHECK-NEXT: #define __loongarch_hard_float 1 ++// CHECK-NEXT: #define __loongarch_lp64 1 +diff --git a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +index 75928d912..3350ee3fe 100644 +--- a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp ++++ b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +@@ -104,6 +104,11 @@ extern "C" int throw_exception() { + if (Triple.isPPC()) + return; + ++ // FIXME: LoongArch64 fails due to `Symbols not found: ++ // [DW.ref.__gxx_personality_v0]` ++ if (Triple.isLoongArch64()) ++ return; ++ + // FIXME: ARM fails due to `Not implemented relocation type!` + if (Triple.isARM()) + return; diff --git a/llvm-compat.spec b/llvm-compat.spec index dce47e1..b8b1bb8 100644 --- a/llvm-compat.spec +++ b/llvm-compat.spec @@ -1,3 +1,4 @@ +%define anolis_release .0.1 %global maj_ver 15 %global min_ver 0 %global patch_ver 7 @@ -17,7 +18,7 @@ Name: llvm-compat Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: %{baserelease}%{?dist} +Release: %{baserelease}%{anolis_release}%{?dist} Summary: The Low Level Virtual Machine License: NCSA @@ -27,10 +28,14 @@ Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{versio Source2: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{version}/cmake-%{version}.src.tar.xz # LLVM Patches: +Patch1: 0001-Support-LoongArch.patch +Patch2: 0002-Add-LoongArch-Support-for-ObjectYAML.patch +Patch3: 0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch # Clang Patches: Patch101: 0001-PATCH-clang-Reorganize-gtest-integration.patch Patch102: 0002-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch103: 0103-Support-LoongArch.patch # Not Upstream Patch115: 0001-clang-Don-t-install-static-libraries.patch @@ -84,7 +89,7 @@ mv clang-%{version}.src clang %build -%ifarch s390 %ix86 +%ifarch s390 %ix86 loongarch64 # Decrease debuginfo verbosity to reduce memory consumption during final library linking %global optflags %(echo %{optflags} | sed 's/-g /-g1 /') %endif @@ -102,12 +107,12 @@ pushd llvm-build %else -DCMAKE_BUILD_TYPE=RelWithDebInfo \ %endif -%ifarch s390 %ix86 +%ifarch s390 %ix86 loongarch64 -DCMAKE_C_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" \ -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" \ %endif \ - -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;PowerPC;NVPTX;SystemZ;AArch64;ARM;Mips;BPF;WebAssembly" \ + -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;PowerPC;NVPTX;SystemZ;AArch64;ARM;Mips;BPF;WebAssembly;LoongArch" \ -DLLVM_ENABLE_LIBCXX:BOOL=OFF \ -DLLVM_ENABLE_ZLIB:BOOL=ON \ -DLLVM_ENABLE_FFI:BOOL=ON \ @@ -162,6 +167,9 @@ done %endif %changelog +* Mon Dec 23 2024 Chen Li - 15.0.7-1.0.1 +- Add support for LoongArch + * Fri Apr 28 2023 Tom Stellard - 15.0.7-1 - 15.0.7 Release -- Gitee